├── .envrc
├── .gitignore
├── src
    ├── unicode
    │   ├── ICU_SHA
    │   ├── utf.h
    │   ├── ptypes.h
    │   ├── urename.h
    │   ├── README.md
    │   └── umachine.h
    ├── ts_assert.h
    ├── lib.c
    ├── error_costs.h
    ├── wasm
    │   ├── stdlib-symbols.txt
    │   └── stdlib.c
    ├── host.h
    ├── tree.h
    ├── reduce_action.h
    ├── get_changed_ranges.h
    ├── alloc.h
    ├── wasm_store.h
    ├── tree_cursor.h
    ├── length.h
    ├── lexer.h
    ├── point.h
    ├── atomic.h
    ├── alloc.c
    ├── unicode.h
    ├── reusable_node.h
    ├── clock.h
    ├── stack.h
    ├── tree.c
    ├── portable
    │   └── endian.h
    ├── parser.h
    ├── language.h
    ├── language.c
    ├── array.h
    ├── subtree.h
    ├── lexer.c
    └── get_changed_ranges.c
├── .gitmodules
├── allocator.h
├── logger.go
├── dup_unix.go
├── copy.sh
├── tree_sitter.go
├── dup_windows.go
├── allocator.c
├── .github
    ├── dependabot.yml
    ├── FUNDING.yml
    └── workflows
    │   ├── ci.yml
    │   └── copy.yml
├── flake.lock
├── point.go
├── edit.go
├── language_test.go
├── go.mod
├── LICENSE
├── ranges.go
├── tree_cursor_test.go
├── lookahead_iterator_test.go
├── flake.nix
├── lookahead_iterator.go
├── README.md
├── go.sum
├── allocator.go
├── edit_test.go
├── tree.go
├── language.go
├── tree_cursor.go
└── node.go


/.envrc:
--------------------------------------------------------------------------------
1 | use flake
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .direnv
2 | 


--------------------------------------------------------------------------------
/src/unicode/ICU_SHA:
--------------------------------------------------------------------------------
1 | 552b01f61127d30d6589aa4bf99468224979b661
2 | 


--------------------------------------------------------------------------------
/src/unicode/utf.h:
--------------------------------------------------------------------------------
1 | // This file must exist in order for `utf8.h` and `utf16.h` to be used.
2 | 


--------------------------------------------------------------------------------
/src/unicode/ptypes.h:
--------------------------------------------------------------------------------
1 | // This file must exist in order for `utf8.h` and `utf16.h` to be used.
2 | 


--------------------------------------------------------------------------------
/src/unicode/urename.h:
--------------------------------------------------------------------------------
1 | // This file must exist in order for `utf8.h` and `utf16.h` to be used.
2 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "tree-sitter"]
2 | 	path = tree-sitter
3 | 	url = https://github.com/tree-sitter/tree-sitter
4 | 


--------------------------------------------------------------------------------
/allocator.h:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | 
 3 | void *c_malloc_fn(size_t size);
 4 | 
 5 | void *c_calloc_fn(size_t num, size_t size);
 6 | 
 7 | void *c_realloc_fn(void *ptr, size_t size);
 8 | 
 9 | void c_free_fn(void *ptr);
10 | 


--------------------------------------------------------------------------------
/logger.go:
--------------------------------------------------------------------------------
 1 | package tree_sitter
 2 | 
 3 | type LogType int
 4 | 
 5 | const (
 6 | 	LogTypeParse LogType = iota
 7 | 	LogTypeLex
 8 | )
 9 | 
10 | // A callback that receives log messages during parser.
11 | type Logger = func(LogType, string)
12 | 


--------------------------------------------------------------------------------
/dup_unix.go:
--------------------------------------------------------------------------------
 1 | //go:build linux || darwin
 2 | 
 3 | package tree_sitter
 4 | 
 5 | /*
 6 | #include <unistd.h>
 7 | */
 8 | import "C"
 9 | 
10 | // Wrapper for Unix systems
11 | func dupeFD(fd uintptr) int {
12 | 	return int(C.dup(C.int(fd)))
13 | }
14 | 


--------------------------------------------------------------------------------
/src/ts_assert.h:
--------------------------------------------------------------------------------
 1 | #ifndef TREE_SITTER_ASSERT_H_
 2 | #define TREE_SITTER_ASSERT_H_
 3 | 
 4 | #ifdef NDEBUG
 5 | #define ts_assert(e) ((void)(e))
 6 | #else
 7 | #include <assert.h>
 8 | #define ts_assert(e) assert(e)
 9 | #endif
10 | 
11 | #endif // TREE_SITTER_ASSERT_H_
12 | 


--------------------------------------------------------------------------------
/copy.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | SRC_DIR="tree-sitter/lib"
 4 | 
 5 | if [ ! -d "$SRC_DIR/src" ] || [ ! -d "$SRC_DIR/include" ]; then
 6 | 	echo "Error: source directories do not exist."
 7 | 	exit 1
 8 | fi
 9 | 
10 | cp -r "$SRC_DIR/src/" "."
11 | cp -r "$SRC_DIR/include/" "."
12 | 


--------------------------------------------------------------------------------
/tree_sitter.go:
--------------------------------------------------------------------------------
1 | package tree_sitter
2 | 
3 | /*
4 | #cgo CFLAGS: -Iinclude -Isrc -std=c11 -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE
5 | #include <tree_sitter/api.h>
6 | #include "lib.c" // <- This is needed to build the C library from the C source code, but cannot be included in files that have other declarations.
7 | */
8 | import "C"
9 | 


--------------------------------------------------------------------------------
/src/lib.c:
--------------------------------------------------------------------------------
 1 | #include "./alloc.c"
 2 | #include "./get_changed_ranges.c"
 3 | #include "./language.c"
 4 | #include "./lexer.c"
 5 | #include "./node.c"
 6 | #include "./parser.c"
 7 | #include "./query.c"
 8 | #include "./stack.c"
 9 | #include "./subtree.c"
10 | #include "./tree_cursor.c"
11 | #include "./tree.c"
12 | #include "./wasm_store.c"
13 | 


--------------------------------------------------------------------------------
/src/error_costs.h:
--------------------------------------------------------------------------------
 1 | #ifndef TREE_SITTER_ERROR_COSTS_H_
 2 | #define TREE_SITTER_ERROR_COSTS_H_
 3 | 
 4 | #define ERROR_STATE 0
 5 | #define ERROR_COST_PER_RECOVERY 500
 6 | #define ERROR_COST_PER_MISSING_TREE 110
 7 | #define ERROR_COST_PER_SKIPPED_TREE 100
 8 | #define ERROR_COST_PER_SKIPPED_LINE 30
 9 | #define ERROR_COST_PER_SKIPPED_CHAR 1
10 | 
11 | #endif
12 | 


--------------------------------------------------------------------------------
/dup_windows.go:
--------------------------------------------------------------------------------
 1 | //go:build windows
 2 | 
 3 | package tree_sitter
 4 | 
 5 | /*
 6 | #include <windows.h>
 7 | HANDLE _ts_dup(HANDLE handle);
 8 | */
 9 | import "C"
10 | import "unsafe"
11 | 
12 | // Wrapper for Windows systems
13 | func dupeFD(handle uintptr) uintptr {
14 | 	hHandle := C.HANDLE(unsafe.Pointer(handle))
15 | 	return uintptr(unsafe.Pointer(C._ts_dup(hHandle)))
16 | }
17 | 


--------------------------------------------------------------------------------
/src/wasm/stdlib-symbols.txt:
--------------------------------------------------------------------------------
 1 | "calloc",
 2 | "free",
 3 | "iswalnum",
 4 | "iswalpha",
 5 | "iswblank",
 6 | "iswdigit",
 7 | "iswlower",
 8 | "iswspace",
 9 | "iswupper",
10 | "iswxdigit",
11 | "malloc",
12 | "memchr",
13 | "memcmp",
14 | "memcpy",
15 | "memmove",
16 | "memset",
17 | "realloc",
18 | "strcmp",
19 | "strlen",
20 | "strncat",
21 | "strncmp",
22 | "strncpy",
23 | "towlower",
24 | "towupper",
25 | 


--------------------------------------------------------------------------------
/allocator.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | 
 3 | extern void *go_malloc(size_t size);
 4 | extern void *go_calloc(size_t num, size_t size);
 5 | extern void *go_realloc(void *ptr, size_t size);
 6 | extern void go_free(void *ptr);
 7 | 
 8 | void *c_malloc_fn(size_t size) { return go_malloc(size); }
 9 | 
10 | void *c_calloc_fn(size_t num, size_t size) { return go_calloc(num, size); }
11 | 
12 | void *c_realloc_fn(void *ptr, size_t size) { return go_realloc(ptr, size); }
13 | 
14 | void c_free_fn(void *ptr) { go_free(ptr); }
15 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: github-actions
 4 |     directory: /
 5 |     schedule:
 6 |       interval: weekly
 7 |       day: saturday
 8 |     commit-message:
 9 |       prefix: ci
10 |     groups:
11 |       actions:
12 |         patterns: ["*"]
13 |     labels: [dependencies]
14 |     open-pull-requests-limit: 1
15 |   - package-ecosystem: gitsubmodule
16 |     directory: /
17 |     schedule:
18 |       interval: weekly
19 |       day: sunday
20 |     commit-message:
21 |       prefix: build
22 |     labels: [dependencies]
23 |     open-pull-requests-limit: 1
24 | 


--------------------------------------------------------------------------------
/src/host.h:
--------------------------------------------------------------------------------
 1 | 
 2 | // Determine endian and pointer size based on known defines.
 3 | // TS_BIG_ENDIAN and TS_PTR_SIZE can be set as -D compiler arguments
 4 | // to override this.
 5 | 
 6 | #if !defined(TS_BIG_ENDIAN)
 7 | #if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) \
 8 |   || (defined( __APPLE_CC__) && (defined(__ppc__) || defined(__ppc64__)))
 9 | #define TS_BIG_ENDIAN 1
10 | #else
11 | #define TS_BIG_ENDIAN 0
12 | #endif
13 | #endif
14 | 
15 | #if !defined(TS_PTR_SIZE)
16 | #if UINTPTR_MAX == 0xFFFFFFFF
17 | #define TS_PTR_SIZE 32
18 | #else
19 | #define TS_PTR_SIZE 64
20 | #endif
21 | #endif
22 | 


--------------------------------------------------------------------------------
/flake.lock:
--------------------------------------------------------------------------------
 1 | {
 2 |   "nodes": {
 3 |     "nixpkgs": {
 4 |       "locked": {
 5 |         "lastModified": 1762596750,
 6 |         "narHash": "sha256-rXXuz51Bq7DHBlfIjN7jO8Bu3du5TV+3DSADBX7/9YQ=",
 7 |         "owner": "NixOS",
 8 |         "repo": "nixpkgs",
 9 |         "rev": "b6a8526db03f735b89dd5ff348f53f752e7ddc8e",
10 |         "type": "github"
11 |       },
12 |       "original": {
13 |         "owner": "NixOS",
14 |         "ref": "nixos-unstable",
15 |         "repo": "nixpkgs",
16 |         "type": "github"
17 |       }
18 |     },
19 |     "root": {
20 |       "inputs": {
21 |         "nixpkgs": "nixpkgs"
22 |       }
23 |     }
24 |   },
25 |   "root": "root",
26 |   "version": 7
27 | }
28 | 


--------------------------------------------------------------------------------
/point.go:
--------------------------------------------------------------------------------
 1 | package tree_sitter
 2 | 
 3 | /*
 4 | #cgo CFLAGS: -Iinclude -Isrc -std=c11 -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE
 5 | #include <tree_sitter/api.h>
 6 | */
 7 | import "C"
 8 | 
 9 | // A position in a multi-line text document, in terms of rows and columns.
10 | //
11 | // Rows and columns are zero-based.
12 | type Point struct {
13 | 	Row    uint
14 | 	Column uint
15 | }
16 | 
17 | func NewPoint(row, column uint) Point {
18 | 	return Point{Row: row, Column: column}
19 | }
20 | 
21 | func (p *Point) toTSPoint() C.TSPoint {
22 | 	return C.TSPoint{
23 | 		row:    C.uint32_t(p.Row),
24 | 		column: C.uint32_t(p.Column),
25 | 	}
26 | }
27 | 
28 | func (p *Point) fromTSPoint(tp C.TSPoint) {
29 | 	p.Row = uint(tp.row)
30 | 	p.Column = uint(tp.column)
31 | }
32 | 


--------------------------------------------------------------------------------
/edit.go:
--------------------------------------------------------------------------------
 1 | package tree_sitter
 2 | 
 3 | /*
 4 | #cgo CFLAGS: -Iinclude -Isrc -std=c11 -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE
 5 | #include <tree_sitter/api.h>
 6 | */
 7 | import "C"
 8 | 
 9 | type InputEdit struct {
10 | 	StartByte      uint
11 | 	OldEndByte     uint
12 | 	NewEndByte     uint
13 | 	StartPosition  Point
14 | 	OldEndPosition Point
15 | 	NewEndPosition Point
16 | }
17 | 
18 | func (i *InputEdit) toTSInputEdit() *C.TSInputEdit {
19 | 	return &C.TSInputEdit{
20 | 		start_byte:    C.uint(i.StartByte),
21 | 		old_end_byte:  C.uint(i.OldEndByte),
22 | 		new_end_byte:  C.uint(i.NewEndByte),
23 | 		start_point:   i.StartPosition.toTSPoint(),
24 | 		old_end_point: i.OldEndPosition.toTSPoint(),
25 | 		new_end_point: i.NewEndPosition.toTSPoint(),
26 | 	}
27 | }
28 | 


--------------------------------------------------------------------------------
/src/tree.h:
--------------------------------------------------------------------------------
 1 | #ifndef TREE_SITTER_TREE_H_
 2 | #define TREE_SITTER_TREE_H_
 3 | 
 4 | #include "./subtree.h"
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C" {
 8 | #endif
 9 | 
10 | typedef struct {
11 |   const Subtree *child;
12 |   const Subtree *parent;
13 |   Length position;
14 |   TSSymbol alias_symbol;
15 | } ParentCacheEntry;
16 | 
17 | struct TSTree {
18 |   Subtree root;
19 |   const TSLanguage *language;
20 |   TSRange *included_ranges;
21 |   unsigned included_range_count;
22 | };
23 | 
24 | TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *included_ranges, unsigned included_range_count);
25 | TSNode ts_node_new(const TSTree *tree, const Subtree *subtree, Length position, TSSymbol alias);
26 | 
27 | #ifdef __cplusplus
28 | }
29 | #endif
30 | 
31 | #endif  // TREE_SITTER_TREE_H_
32 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: tree-sitter
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: tree-sitter # Replace with a single Open Collective username
 6 | ko_fi: amaanq
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
12 | polar: # Replace with a single Polar username
13 | buy_me_a_coffee: # Replace with a single Buy Me a Coffee username
14 | thanks_dev: # Replace with a single thanks.dev username
15 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
16 | 


--------------------------------------------------------------------------------
/src/reduce_action.h:
--------------------------------------------------------------------------------
 1 | #ifndef TREE_SITTER_REDUCE_ACTION_H_
 2 | #define TREE_SITTER_REDUCE_ACTION_H_
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | #include "./array.h"
 9 | #include "tree_sitter/api.h"
10 | 
11 | typedef struct {
12 |   uint32_t count;
13 |   TSSymbol symbol;
14 |   int dynamic_precedence;
15 |   unsigned short production_id;
16 | } ReduceAction;
17 | 
18 | typedef Array(ReduceAction) ReduceActionSet;
19 | 
20 | static inline void ts_reduce_action_set_add(ReduceActionSet *self,
21 |                                             ReduceAction new_action) {
22 |   for (uint32_t i = 0; i < self->size; i++) {
23 |     ReduceAction action = self->contents[i];
24 |     if (action.symbol == new_action.symbol && action.count == new_action.count)
25 |       return;
26 |   }
27 |   array_push(self, new_action);
28 | }
29 | 
30 | #ifdef __cplusplus
31 | }
32 | #endif
33 | 
34 | #endif  // TREE_SITTER_REDUCE_ACTION_H_
35 | 


--------------------------------------------------------------------------------
/language_test.go:
--------------------------------------------------------------------------------
 1 | package tree_sitter_test
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/assert"
 7 | )
 8 | 
 9 | func TestSymbolMetadataChecks(t *testing.T) {
10 | 	language := getLanguage("rust")
11 | 	for id := range language.NodeKindCount() {
12 | 		name := language.NodeKindForId(uint16(id))
13 | 
14 | 		switch name {
15 | 		case "_type", "_expression", "_pattern", "_literal", "_literal_pattern", "_declaration_statement":
16 | 			assert.True(t, language.NodeKindIsSupertype(uint16(id)))
17 | 
18 | 		case "_raw_string_literal_start", "_raw_string_literal_end", "_line_doc_comment", "_error_sentinel":
19 | 			assert.False(t, language.NodeKindIsSupertype(uint16(id)))
20 | 
21 | 		case "enum_item", "struct_item", "type_item":
22 | 			assert.True(t, language.NodeKindIsNamed(uint16(id)))
23 | 
24 | 		case "=>", "[", "]", "(", ")", "{", "}":
25 | 			assert.True(t, language.NodeKindIsVisible(uint16(id)))
26 | 		}
27 | 	}
28 | }
29 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/tree-sitter/go-tree-sitter
 2 | 
 3 | go 1.23
 4 | 
 5 | require (
 6 | 	github.com/mattn/go-pointer v0.0.1
 7 | 	github.com/stretchr/testify v1.10.0
 8 | 	github.com/tree-sitter/tree-sitter-c v0.23.4
 9 | 	github.com/tree-sitter/tree-sitter-cpp v0.23.4
10 | 	github.com/tree-sitter/tree-sitter-embedded-template v0.23.2
11 | 	github.com/tree-sitter/tree-sitter-go v0.23.4
12 | 	github.com/tree-sitter/tree-sitter-html v0.23.2
13 | 	github.com/tree-sitter/tree-sitter-java v0.23.5
14 | 	github.com/tree-sitter/tree-sitter-javascript v0.23.1
15 | 	github.com/tree-sitter/tree-sitter-json v0.24.8
16 | 	github.com/tree-sitter/tree-sitter-php v0.23.11
17 | 	github.com/tree-sitter/tree-sitter-python v0.23.6
18 | 	github.com/tree-sitter/tree-sitter-ruby v0.23.1
19 | 	github.com/tree-sitter/tree-sitter-rust v0.23.2
20 | )
21 | 
22 | require (
23 | 	github.com/davecgh/go-spew v1.1.1 // indirect
24 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
25 | 	gopkg.in/yaml.v3 v3.0.1 // indirect
26 | )
27 | 


--------------------------------------------------------------------------------
/src/get_changed_ranges.h:
--------------------------------------------------------------------------------
 1 | #ifndef TREE_SITTER_GET_CHANGED_RANGES_H_
 2 | #define TREE_SITTER_GET_CHANGED_RANGES_H_
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | #include "./tree_cursor.h"
 9 | #include "./subtree.h"
10 | 
11 | typedef Array(TSRange) TSRangeArray;
12 | 
13 | void ts_range_array_get_changed_ranges(
14 |   const TSRange *old_ranges, unsigned old_range_count,
15 |   const TSRange *new_ranges, unsigned new_range_count,
16 |   TSRangeArray *differences
17 | );
18 | 
19 | bool ts_range_array_intersects(
20 |   const TSRangeArray *self, unsigned start_index,
21 |   uint32_t start_byte, uint32_t end_byte
22 | );
23 | 
24 | unsigned ts_subtree_get_changed_ranges(
25 |   const Subtree *old_tree, const Subtree *new_tree,
26 |   TreeCursor *cursor1, TreeCursor *cursor2,
27 |   const TSLanguage *language,
28 |   const TSRangeArray *included_range_differences,
29 |   TSRange **ranges
30 | );
31 | 
32 | #ifdef __cplusplus
33 | }
34 | #endif
35 | 
36 | #endif  // TREE_SITTER_GET_CHANGED_RANGES_H_
37 | 


--------------------------------------------------------------------------------
/src/alloc.h:
--------------------------------------------------------------------------------
 1 | #ifndef TREE_SITTER_ALLOC_H_
 2 | #define TREE_SITTER_ALLOC_H_
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | #include <stdbool.h>
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | 
12 | #if defined(TREE_SITTER_HIDDEN_SYMBOLS) || defined(_WIN32)
13 | #define TS_PUBLIC
14 | #else
15 | #define TS_PUBLIC __attribute__((visibility("default")))
16 | #endif
17 | 
18 | TS_PUBLIC extern void *(*ts_current_malloc)(size_t size);
19 | TS_PUBLIC extern void *(*ts_current_calloc)(size_t count, size_t size);
20 | TS_PUBLIC extern void *(*ts_current_realloc)(void *ptr, size_t size);
21 | TS_PUBLIC extern void (*ts_current_free)(void *ptr);
22 | 
23 | // Allow clients to override allocation functions
24 | #ifndef ts_malloc
25 | #define ts_malloc  ts_current_malloc
26 | #endif
27 | #ifndef ts_calloc
28 | #define ts_calloc  ts_current_calloc
29 | #endif
30 | #ifndef ts_realloc
31 | #define ts_realloc ts_current_realloc
32 | #endif
33 | #ifndef ts_free
34 | #define ts_free    ts_current_free
35 | #endif
36 | 
37 | #ifdef __cplusplus
38 | }
39 | #endif
40 | 
41 | #endif // TREE_SITTER_ALLOC_H_
42 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2024 Amaan Qureshi <amaanq12@gmail.com>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/ranges.go:
--------------------------------------------------------------------------------
 1 | package tree_sitter
 2 | 
 3 | /*
 4 | #cgo CFLAGS: -Iinclude -Isrc -std=c11 -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE
 5 | #include <tree_sitter/api.h>
 6 | */
 7 | import "C"
 8 | import "fmt"
 9 | 
10 | // A range of positions in a multi-line text document, both in terms of bytes
11 | // and of rows and columns.
12 | type Range struct {
13 | 	StartByte  uint
14 | 	EndByte    uint
15 | 	StartPoint Point
16 | 	EndPoint   Point
17 | }
18 | 
19 | // An error that occurred in [Parser.SetIncludedRanges].
20 | type IncludedRangesError struct {
21 | 	Index uint32
22 | }
23 | 
24 | func (r *Range) ToTSRange() C.TSRange {
25 | 	return C.TSRange{
26 | 		start_byte:  C.uint32_t(r.StartByte),
27 | 		end_byte:    C.uint32_t(r.EndByte),
28 | 		start_point: r.StartPoint.toTSPoint(),
29 | 		end_point:   r.EndPoint.toTSPoint(),
30 | 	}
31 | }
32 | 
33 | func (r *Range) FromTSRange(tr C.TSRange) {
34 | 	r.StartByte = uint(tr.start_byte)
35 | 	r.EndByte = uint(tr.end_byte)
36 | 	r.StartPoint.fromTSPoint(tr.start_point)
37 | 	r.EndPoint.fromTSPoint(tr.end_point)
38 | }
39 | 
40 | func (i *IncludedRangesError) Error() string {
41 | 	return fmt.Sprintf("Incorrect range by index: %d", i.Index)
42 | }
43 | 


--------------------------------------------------------------------------------
/src/wasm_store.h:
--------------------------------------------------------------------------------
 1 | #ifndef TREE_SITTER_WASM_H_
 2 | #define TREE_SITTER_WASM_H_
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | #include "tree_sitter/api.h"
 9 | #include "./parser.h"
10 | 
11 | bool ts_wasm_store_start(TSWasmStore *self, TSLexer *lexer, const TSLanguage *language);
12 | void ts_wasm_store_reset(TSWasmStore *self);
13 | bool ts_wasm_store_has_error(const TSWasmStore *self);
14 | 
15 | bool ts_wasm_store_call_lex_main(TSWasmStore *self, TSStateId state);
16 | bool ts_wasm_store_call_lex_keyword(TSWasmStore *self, TSStateId state);
17 | 
18 | uint32_t ts_wasm_store_call_scanner_create(TSWasmStore *self);
19 | void ts_wasm_store_call_scanner_destroy(TSWasmStore *self, uint32_t scanner_address);
20 | bool ts_wasm_store_call_scanner_scan(TSWasmStore *self, uint32_t scanner_address, uint32_t valid_tokens_ix);
21 | uint32_t ts_wasm_store_call_scanner_serialize(TSWasmStore *self, uint32_t scanner_address, char *buffer);
22 | void ts_wasm_store_call_scanner_deserialize(TSWasmStore *self, uint32_t scanner, const char *buffer, unsigned length);
23 | 
24 | void ts_wasm_language_retain(const TSLanguage *self);
25 | void ts_wasm_language_release(const TSLanguage *self);
26 | 
27 | #ifdef __cplusplus
28 | }
29 | #endif
30 | 
31 | #endif  // TREE_SITTER_WASM_H_
32 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: Build and Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [master]
 6 |   pull_request:
 7 | 
 8 | jobs:
 9 |   test:
10 |     strategy:
11 |       matrix:
12 |         os: [ubuntu-latest, macos-latest, windows-latest]
13 |     runs-on: ${{ matrix.os }}
14 | 
15 |     steps:
16 |       - name: Checkout
17 |         uses: actions/checkout@v4
18 |         with:
19 |           submodules: "recursive"
20 | 
21 |       - name: Set up Go
22 |         uses: actions/setup-go@v5
23 |         with:
24 |           go-version: "1.23"
25 | 
26 |       - name: Build
27 |         run: go build -v ./...
28 | 
29 |       - name: Test
30 |         run: go test -v ./...
31 | 
32 |   nix:
33 |     strategy:
34 |       matrix:
35 |         os: [ubuntu-latest, macos-latest]
36 |     runs-on: ${{ matrix.os }}
37 | 
38 |     steps:
39 |       - name: Checkout
40 |         uses: actions/checkout@v4
41 |         with:
42 |           submodules: "recursive"
43 | 
44 |       - name: Set up Nix
45 |         uses: DeterminateSystems/nix-installer-action@main
46 | 
47 |       - name: Set up Nix Cache
48 |         uses: DeterminateSystems/magic-nix-cache-action@main
49 |         with:
50 |           use-flakehub: false
51 | 
52 |       - name: Build and test with Nix
53 |         run: nix build
54 | 


--------------------------------------------------------------------------------
/src/unicode/README.md:
--------------------------------------------------------------------------------
 1 | # ICU Parts
 2 | 
 3 | This directory contains a small subset of files from the Unicode organization's [ICU repository](https://github.com/unicode-org/icu).
 4 | 
 5 | ### License
 6 | 
 7 | The license for these files is contained in the `LICENSE` file within this directory.
 8 | 
 9 | ### Contents
10 | 
11 | * Source files taken from the [`icu4c/source/common/unicode`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c/source/common/unicode) directory:
12 |   * `utf8.h`
13 |   * `utf16.h`
14 |   * `umachine.h`
15 | * Empty source files that are referenced by the above source files, but whose original contents in `libicu` are not needed:
16 |   * `ptypes.h`
17 |   * `urename.h`
18 |   * `utf.h`
19 | * `ICU_SHA` - File containing the Git SHA of the commit in the `icu` repository from which the files were obtained.
20 | * `LICENSE` - The license file from the [`icu4c`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c) directory of the `icu` repository.
21 | * `README.md` - This text file.
22 | 
23 | ### Updating ICU
24 | 
25 | To incorporate changes from the upstream `icu` repository:
26 | 
27 | * Update `ICU_SHA` with the new Git SHA.
28 | * Update `LICENSE` with the license text from the directory mentioned above.
29 | * Update `utf8.h`, `utf16.h`, and `umachine.h` with their new contents in the `icu` repository.
30 | 


--------------------------------------------------------------------------------
/src/tree_cursor.h:
--------------------------------------------------------------------------------
 1 | #ifndef TREE_SITTER_TREE_CURSOR_H_
 2 | #define TREE_SITTER_TREE_CURSOR_H_
 3 | 
 4 | #include "./subtree.h"
 5 | 
 6 | typedef struct {
 7 |   const Subtree *subtree;
 8 |   Length position;
 9 |   uint32_t child_index;
10 |   uint32_t structural_child_index;
11 |   uint32_t descendant_index;
12 | } TreeCursorEntry;
13 | 
14 | typedef struct {
15 |   const TSTree *tree;
16 |   Array(TreeCursorEntry) stack;
17 |   TSSymbol root_alias_symbol;
18 | } TreeCursor;
19 | 
20 | typedef enum {
21 |   TreeCursorStepNone,
22 |   TreeCursorStepHidden,
23 |   TreeCursorStepVisible,
24 | } TreeCursorStep;
25 | 
26 | void ts_tree_cursor_init(TreeCursor *self, TSNode node);
27 | void ts_tree_cursor_current_status(
28 |   const TSTreeCursor *_self,
29 |   TSFieldId *field_id,
30 |   bool *has_later_siblings,
31 |   bool *has_later_named_siblings,
32 |   bool *can_have_later_siblings_with_this_field,
33 |   TSSymbol *supertypes,
34 |   unsigned *supertype_count
35 | );
36 | 
37 | TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *_self);
38 | TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self);
39 | 
40 | static inline Subtree ts_tree_cursor_current_subtree(const TSTreeCursor *_self) {
41 |   const TreeCursor *self = (const TreeCursor *)_self;
42 |   TreeCursorEntry *last_entry = array_back(&self->stack);
43 |   return *last_entry->subtree;
44 | }
45 | 
46 | TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self);
47 | 
48 | #endif  // TREE_SITTER_TREE_CURSOR_H_
49 | 


--------------------------------------------------------------------------------
/src/length.h:
--------------------------------------------------------------------------------
 1 | #ifndef TREE_SITTER_LENGTH_H_
 2 | #define TREE_SITTER_LENGTH_H_
 3 | 
 4 | #include <stdlib.h>
 5 | #include <stdbool.h>
 6 | #include "./point.h"
 7 | #include "tree_sitter/api.h"
 8 | 
 9 | typedef struct {
10 |   uint32_t bytes;
11 |   TSPoint extent;
12 | } Length;
13 | 
14 | static const Length LENGTH_UNDEFINED = {0, {0, 1}};
15 | static const Length LENGTH_MAX = {UINT32_MAX, {UINT32_MAX, UINT32_MAX}};
16 | 
17 | static inline bool length_is_undefined(Length length) {
18 |   return length.bytes == 0 && length.extent.column != 0;
19 | }
20 | 
21 | static inline Length length_min(Length len1, Length len2) {
22 |   return (len1.bytes < len2.bytes) ? len1 : len2;
23 | }
24 | 
25 | static inline Length length_add(Length len1, Length len2) {
26 |   Length result;
27 |   result.bytes = len1.bytes + len2.bytes;
28 |   result.extent = point_add(len1.extent, len2.extent);
29 |   return result;
30 | }
31 | 
32 | static inline Length length_sub(Length len1, Length len2) {
33 |   Length result;
34 |   result.bytes = (len1.bytes >= len2.bytes) ? len1.bytes - len2.bytes : 0;
35 |   result.extent = point_sub(len1.extent, len2.extent);
36 |   return result;
37 | }
38 | 
39 | static inline Length length_zero(void) {
40 |   Length result = {0, {0, 0}};
41 |   return result;
42 | }
43 | 
44 | static inline Length length_saturating_sub(Length len1, Length len2) {
45 |   if (len1.bytes > len2.bytes) {
46 |     return length_sub(len1, len2);
47 |   } else {
48 |     return length_zero();
49 |   }
50 | }
51 | 
52 | #endif
53 | 


--------------------------------------------------------------------------------
/src/lexer.h:
--------------------------------------------------------------------------------
 1 | #ifndef TREE_SITTER_LEXER_H_
 2 | #define TREE_SITTER_LEXER_H_
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | #include "./length.h"
 9 | #include "./subtree.h"
10 | #include "tree_sitter/api.h"
11 | #include "./parser.h"
12 | 
13 | typedef struct {
14 |   uint32_t value;
15 |   bool valid;
16 | } ColumnData;
17 | 
18 | typedef struct {
19 |   TSLexer data;
20 |   Length current_position;
21 |   Length token_start_position;
22 |   Length token_end_position;
23 | 
24 |   TSRange *included_ranges;
25 |   const char *chunk;
26 |   TSInput input;
27 |   TSLogger logger;
28 | 
29 |   uint32_t included_range_count;
30 |   uint32_t current_included_range_index;
31 |   uint32_t chunk_start;
32 |   uint32_t chunk_size;
33 |   uint32_t lookahead_size;
34 |   bool did_get_column;
35 |   ColumnData column_data;
36 | 
37 |   char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE];
38 | } Lexer;
39 | 
40 | void ts_lexer_init(Lexer *self);
41 | void ts_lexer_delete(Lexer *self);
42 | void ts_lexer_set_input(Lexer *self, TSInput input);
43 | void ts_lexer_reset(Lexer *self, Length position);
44 | void ts_lexer_start(Lexer *self);
45 | void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte);
46 | void ts_lexer_mark_end(Lexer *self);
47 | bool ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count);
48 | TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count);
49 | 
50 | #ifdef __cplusplus
51 | }
52 | #endif
53 | 
54 | #endif  // TREE_SITTER_LEXER_H_
55 | 


--------------------------------------------------------------------------------
/src/point.h:
--------------------------------------------------------------------------------
 1 | #ifndef TREE_SITTER_POINT_H_
 2 | #define TREE_SITTER_POINT_H_
 3 | 
 4 | #include "tree_sitter/api.h"
 5 | 
 6 | #define POINT_ZERO ((TSPoint) {0, 0})
 7 | #define POINT_MAX ((TSPoint) {UINT32_MAX, UINT32_MAX})
 8 | 
 9 | static inline TSPoint point__new(unsigned row, unsigned column) {
10 |   TSPoint result = {row, column};
11 |   return result;
12 | }
13 | 
14 | static inline TSPoint point_add(TSPoint a, TSPoint b) {
15 |   if (b.row > 0)
16 |     return point__new(a.row + b.row, b.column);
17 |   else
18 |     return point__new(a.row, a.column + b.column);
19 | }
20 | 
21 | static inline TSPoint point_sub(TSPoint a, TSPoint b) {
22 |   if (a.row > b.row)
23 |     return point__new(a.row - b.row, a.column);
24 |   else
25 |     return point__new(0, (a.column >= b.column) ? a.column - b.column : 0);
26 | }
27 | 
28 | static inline bool point_lte(TSPoint a, TSPoint b) {
29 |   return (a.row < b.row) || (a.row == b.row && a.column <= b.column);
30 | }
31 | 
32 | static inline bool point_lt(TSPoint a, TSPoint b) {
33 |   return (a.row < b.row) || (a.row == b.row && a.column < b.column);
34 | }
35 | 
36 | static inline bool point_gt(TSPoint a, TSPoint b) {
37 |   return (a.row > b.row) || (a.row == b.row && a.column > b.column);
38 | }
39 | 
40 | static inline bool point_gte(TSPoint a, TSPoint b) {
41 |   return (a.row > b.row) || (a.row == b.row && a.column >= b.column);
42 | }
43 | 
44 | static inline bool point_eq(TSPoint a, TSPoint b) {
45 |   return a.row == b.row && a.column == b.column;
46 | }
47 | 
48 | #endif
49 | 


--------------------------------------------------------------------------------
/tree_cursor_test.go:
--------------------------------------------------------------------------------
 1 | package tree_sitter_test
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 
 6 | 	. "github.com/tree-sitter/go-tree-sitter"
 7 | 	tree_sitter_go "github.com/tree-sitter/tree-sitter-go/bindings/go"
 8 | )
 9 | 
10 | func ExampleTreeCursor() {
11 | 	parser := NewParser()
12 | 	defer parser.Close()
13 | 
14 | 	language := NewLanguage(tree_sitter_go.Language())
15 | 
16 | 	parser.SetLanguage(language)
17 | 
18 | 	tree := parser.Parse(
19 | 		[]byte(`
20 | 			package main
21 | 
22 | 
23 | 			func main() {
24 | 				return
25 | 			}
26 | 		`),
27 | 		nil,
28 | 	)
29 | 	defer tree.Close()
30 | 
31 | 	cursor := tree.Walk()
32 | 	defer cursor.Close()
33 | 
34 | 	fmt.Println(cursor.Node().Kind())
35 | 
36 | 	fmt.Println(cursor.GotoFirstChild())
37 | 	fmt.Println(cursor.Node().Kind())
38 | 
39 | 	fmt.Println(cursor.GotoFirstChild())
40 | 	fmt.Println(cursor.Node().Kind())
41 | 
42 | 	// Returns `false` because the `package` node has no children
43 | 	fmt.Println(cursor.GotoFirstChild())
44 | 
45 | 	fmt.Println(cursor.GotoNextSibling())
46 | 	fmt.Println(cursor.Node().Kind())
47 | 
48 | 	fmt.Println(cursor.GotoParent())
49 | 	fmt.Println(cursor.Node().Kind())
50 | 
51 | 	fmt.Println(cursor.GotoNextSibling())
52 | 	fmt.Println(cursor.GotoNextSibling())
53 | 	fmt.Println(cursor.Node().Kind())
54 | 
55 | 	// Output:
56 | 	// source_file
57 | 	// true
58 | 	// package_clause
59 | 	// true
60 | 	// package
61 | 	// false
62 | 	// true
63 | 	// package_identifier
64 | 	// true
65 | 	// package_clause
66 | 	// true
67 | 	// false
68 | 	// function_declaration
69 | }
70 | 


--------------------------------------------------------------------------------
/.github/workflows/copy.yml:
--------------------------------------------------------------------------------
 1 | name: Copy and Sync Tree-Sitter Files
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: "0 0 * * 0" # weekly
 6 |   workflow_dispatch:
 7 | 
 8 | jobs:
 9 |   sync:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |       - name: Checkout
14 |         uses: actions/checkout@v4
15 |         with:
16 |           submodules: "recursive"
17 | 
18 |       - name: Run copy script
19 |         run: ./copy.sh
20 | 
21 |       - name: Update submodule & Verify no changes
22 |         run: |
23 |           cd tree-sitter
24 |           git fetch
25 |           git checkout release-0.25
26 |           cd ..
27 |           git submodule update --remote
28 |           git diff --exit-code || echo "Changes found"
29 | 
30 |       - name: Commit new changes and create PR
31 |         if: ${{ failure() }}
32 |         env:
33 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
34 |         run: |
35 |           git config user.name "github-actions[bot]"
36 |           git config user.email "github-actions[bot]@users.noreply.github.com"
37 |           git branch -D auto-sync-tree-sitter || true
38 |           git push origin --delete auto-sync-tree-sitter || true
39 |           git checkout -b auto-sync-tree-sitter
40 |           git add src include tree-sitter
41 |           git commit -m "chore(auto-sync): update core tree-sitter library"
42 |           gh pr create --title "chore(auto-sync): update core tree-sitter library" --body "This PR was automatically generated by the GitHub Actions workflow to update the core tree-sitter library." --base master --head auto-sync-tree-sitter
43 | 


--------------------------------------------------------------------------------
/src/atomic.h:
--------------------------------------------------------------------------------
 1 | #ifndef TREE_SITTER_ATOMIC_H_
 2 | #define TREE_SITTER_ATOMIC_H_
 3 | 
 4 | #include <stddef.h>
 5 | #include <stdint.h>
 6 | #include <stdlib.h>
 7 | 
 8 | #ifdef __TINYC__
 9 | 
10 | static inline size_t atomic_load(const volatile size_t *p) {
11 |   return *p;
12 | }
13 | 
14 | static inline uint32_t atomic_inc(volatile uint32_t *p) {
15 |   *p += 1;
16 |   return *p;
17 | }
18 | 
19 | static inline uint32_t atomic_dec(volatile uint32_t *p) {
20 |   *p-= 1;
21 |   return *p;
22 | }
23 | 
24 | #elif defined(_WIN32)
25 | 
26 | #include <windows.h>
27 | 
28 | static inline size_t atomic_load(const volatile size_t *p) {
29 |   return *p;
30 | }
31 | 
32 | static inline uint32_t atomic_inc(volatile uint32_t *p) {
33 |   return InterlockedIncrement((long volatile *)p);
34 | }
35 | 
36 | static inline uint32_t atomic_dec(volatile uint32_t *p) {
37 |   return InterlockedDecrement((long volatile *)p);
38 | }
39 | 
40 | #else
41 | 
42 | static inline size_t atomic_load(const volatile size_t *p) {
43 | #ifdef __ATOMIC_RELAXED
44 |   return __atomic_load_n(p, __ATOMIC_RELAXED);
45 | #else
46 |   return __sync_fetch_and_add((volatile size_t *)p, 0);
47 | #endif
48 | }
49 | 
50 | static inline uint32_t atomic_inc(volatile uint32_t *p) {
51 |   #ifdef __ATOMIC_RELAXED
52 |     return __atomic_add_fetch(p, 1U, __ATOMIC_SEQ_CST);
53 |   #else
54 |     return __sync_add_and_fetch(p, 1U);
55 |   #endif
56 | }
57 | 
58 | static inline uint32_t atomic_dec(volatile uint32_t *p) {
59 |   #ifdef __ATOMIC_RELAXED
60 |     return __atomic_sub_fetch(p, 1U, __ATOMIC_SEQ_CST);
61 |   #else
62 |     return __sync_sub_and_fetch(p, 1U);
63 |   #endif
64 | }
65 | 
66 | #endif
67 | 
68 | #endif  // TREE_SITTER_ATOMIC_H_
69 | 


--------------------------------------------------------------------------------
/src/alloc.c:
--------------------------------------------------------------------------------
 1 | #include "alloc.h"
 2 | #include "tree_sitter/api.h"
 3 | #include <stdlib.h>
 4 | 
 5 | static void *ts_malloc_default(size_t size) {
 6 |   void *result = malloc(size);
 7 |   if (size > 0 && !result) {
 8 |     fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size);
 9 |     abort();
10 |   }
11 |   return result;
12 | }
13 | 
14 | static void *ts_calloc_default(size_t count, size_t size) {
15 |   void *result = calloc(count, size);
16 |   if (count > 0 && !result) {
17 |     fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size);
18 |     abort();
19 |   }
20 |   return result;
21 | }
22 | 
23 | static void *ts_realloc_default(void *buffer, size_t size) {
24 |   void *result = realloc(buffer, size);
25 |   if (size > 0 && !result) {
26 |     fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size);
27 |     abort();
28 |   }
29 |   return result;
30 | }
31 | 
32 | // Allow clients to override allocation functions dynamically
33 | TS_PUBLIC void *(*ts_current_malloc)(size_t) = ts_malloc_default;
34 | TS_PUBLIC void *(*ts_current_calloc)(size_t, size_t) = ts_calloc_default;
35 | TS_PUBLIC void *(*ts_current_realloc)(void *, size_t) = ts_realloc_default;
36 | TS_PUBLIC void (*ts_current_free)(void *) = free;
37 | 
38 | void ts_set_allocator(
39 |   void *(*new_malloc)(size_t size),
40 |   void *(*new_calloc)(size_t count, size_t size),
41 |   void *(*new_realloc)(void *ptr, size_t size),
42 |   void (*new_free)(void *ptr)
43 | ) {
44 |   ts_current_malloc = new_malloc ? new_malloc : ts_malloc_default;
45 |   ts_current_calloc = new_calloc ? new_calloc : ts_calloc_default;
46 |   ts_current_realloc = new_realloc ? new_realloc : ts_realloc_default;
47 |   ts_current_free = new_free ? new_free : free;
48 | }
49 | 


--------------------------------------------------------------------------------
/lookahead_iterator_test.go:
--------------------------------------------------------------------------------
 1 | package tree_sitter_test
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/assert"
 7 | 	. "github.com/tree-sitter/go-tree-sitter"
 8 | )
 9 | 
10 | func TestLookaheadIterator(t *testing.T) {
11 | 	parser := NewParser()
12 | 	defer parser.Close()
13 | 	language := getLanguage("rust")
14 | 	parser.SetLanguage(language)
15 | 
16 | 	tree := parser.Parse([]byte("struct Stuff {}"), nil)
17 | 	defer tree.Close()
18 | 	assert.NotNil(t, tree)
19 | 
20 | 	cursor := tree.Walk()
21 | 
22 | 	assert.True(t, cursor.GotoFirstChild()) // struct
23 | 	assert.True(t, cursor.GotoFirstChild()) // struct keyword
24 | 
25 | 	nextState := cursor.Node().NextParseState()
26 | 	assert.NotEqual(t, 0, nextState)
27 | 	assert.Equal(t, nextState, language.NextState(cursor.Node().ParseState(), cursor.Node().GrammarId()))
28 | 	assert.True(t, uint(nextState) < uint(language.ParseStateCount()))
29 | 	assert.True(t, cursor.GotoNextSibling()) // type_identifier
30 | 	assert.Equal(t, nextState, cursor.Node().ParseState())
31 | 	assert.Equal(t, cursor.Node().GrammarName(), "identifier")
32 | 	assert.NotEqual(t, cursor.Node().GrammarId(), cursor.Node().KindId())
33 | 
34 | 	expectedSymbols := []string{"//", "/*", "identifier", "line_comment", "block_comment"}
35 | 	lookahead := language.LookaheadIterator(nextState)
36 | 	defer lookahead.Close()
37 | 	assert.NotNil(t, lookahead)
38 | 	assert.Equal(t, lookahead.Language(), language)
39 | 	assert.Equal(t, lookahead.IterNames(), expectedSymbols)
40 | 
41 | 	lookahead.ResetState(nextState)
42 | 	assert.Equal(t, lookahead.IterNames(), expectedSymbols)
43 | 
44 | 	lookahead.Reset(language, nextState)
45 | 	var names []string
46 | 	symbols := lookahead.Iter()
47 | 	for _, s := range symbols {
48 | 		names = append(names, language.NodeKindForId(s))
49 | 	}
50 | 	assert.Equal(t, names, expectedSymbols)
51 | }
52 | 


--------------------------------------------------------------------------------
/flake.nix:
--------------------------------------------------------------------------------
 1 | {
 2 |   description = "Go bindings for the Tree-sitter parsing library";
 3 | 
 4 |   inputs = {
 5 |     nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
 6 | 
 7 |     self.submodules = true;
 8 |   };
 9 | 
10 |   outputs =
11 |     inputs:
12 |     let
13 |       inherit (inputs.nixpkgs) lib;
14 |       inherit (inputs) self;
15 |       systems = [
16 |         "x86_64-linux"
17 |         "aarch64-linux"
18 |         "x86_64-darwin"
19 |         "aarch64-darwin"
20 |       ];
21 |       eachSystem = lib.genAttrs systems;
22 |       pkgsFor = inputs.nixpkgs.legacyPackages;
23 |     in
24 |     {
25 |       packages = eachSystem (
26 |         system:
27 |         let
28 |           pkgs = pkgsFor.${system};
29 |           inherit (pkgs) lib;
30 |         in
31 |         {
32 |           default = pkgs.buildGoModule {
33 |             pname = "go-tree-sitter";
34 |             version = "0.25.1";
35 | 
36 |             src = self;
37 | 
38 |             vendorHash = "sha256-6rj6oNohxBQt0LhIaHh3fQKHbNCsLsBkuPYNquHEVzE=";
39 |             proxyVendor = true;
40 | 
41 |             subPackages = [ "." ];
42 | 
43 |             meta = {
44 |               description = "Go bindings for Tree-sitter parsing library";
45 |               homepage = "https://github.com/tree-sitter/go-tree-sitter";
46 |               license = lib.licenses.mit;
47 |               maintainers = [ lib.maintainers.amaanq ];
48 |             };
49 |           };
50 |         }
51 |       );
52 | 
53 |       devShells = eachSystem (
54 |         system:
55 |         let
56 |           pkgs = pkgsFor.${system};
57 |         in
58 |         {
59 |           default = pkgs.mkShell {
60 |             buildInputs = [
61 |               pkgs.go
62 |               pkgs.gopls
63 |             ];
64 |           };
65 |         }
66 |       );
67 | 
68 |       checks = eachSystem (system: {
69 |         inherit (self.packages.${system}) default;
70 |       });
71 |     };
72 | }
73 | 


--------------------------------------------------------------------------------
/src/unicode.h:
--------------------------------------------------------------------------------
 1 | #ifndef TREE_SITTER_UNICODE_H_
 2 | #define TREE_SITTER_UNICODE_H_
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | #include <limits.h>
 9 | #include <stdint.h>
10 | 
11 | #define U_EXPORT
12 | #define U_EXPORT2
13 | #include "unicode/utf8.h"
14 | #include "unicode/utf16.h"
15 | #include "portable/endian.h"
16 | 
17 | #define U16_NEXT_LE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
18 |     (c)=le16toh((s)[(i)++]); \
19 |     if(U16_IS_LEAD(c)) { \
20 |         uint16_t __c2; \
21 |         if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
22 |             ++(i); \
23 |             (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
24 |         } \
25 |     } \
26 | } UPRV_BLOCK_MACRO_END
27 | 
28 | #define U16_NEXT_BE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
29 |     (c)=be16toh((s)[(i)++]); \
30 |     if(U16_IS_LEAD(c)) { \
31 |         uint16_t __c2; \
32 |         if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
33 |             ++(i); \
34 |             (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
35 |         } \
36 |     } \
37 | } UPRV_BLOCK_MACRO_END
38 | 
39 | static const int32_t TS_DECODE_ERROR = U_SENTINEL;
40 | 
41 | static inline uint32_t ts_decode_utf8(
42 |   const uint8_t *string,
43 |   uint32_t length,
44 |   int32_t *code_point
45 | ) {
46 |   uint32_t i = 0;
47 |   U8_NEXT(string, i, length, *code_point);
48 |   return i;
49 | }
50 | 
51 | static inline uint32_t ts_decode_utf16_le(
52 |   const uint8_t *string,
53 |   uint32_t length,
54 |   int32_t *code_point
55 | ) {
56 |   uint32_t i = 0;
57 |   U16_NEXT_LE(((uint16_t *)string), i, length, *code_point);
58 |   return i * 2;
59 | }
60 | 
61 | static inline uint32_t ts_decode_utf16_be(
62 |   const uint8_t *string,
63 |   uint32_t length,
64 |   int32_t *code_point
65 | ) {
66 |   uint32_t i = 0;
67 |   U16_NEXT_BE(((uint16_t *)string), i, length, *code_point);
68 |   return i * 2;
69 | }
70 | 
71 | #ifdef __cplusplus
72 | }
73 | #endif
74 | 
75 | #endif  // TREE_SITTER_UNICODE_H_
76 | 


--------------------------------------------------------------------------------
/lookahead_iterator.go:
--------------------------------------------------------------------------------
 1 | package tree_sitter
 2 | 
 3 | /*
 4 | #cgo CFLAGS: -Iinclude -Isrc -std=c11 -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE
 5 | #include <tree_sitter/api.h>
 6 | */
 7 | import "C"
 8 | 
 9 | import (
10 | 	"unsafe"
11 | )
12 | 
13 | type LookaheadIterator struct {
14 | 	_inner *C.TSLookaheadIterator
15 | }
16 | 
17 | func newLookaheadIterator(ptr *C.TSLookaheadIterator) *LookaheadIterator {
18 | 	return &LookaheadIterator{_inner: ptr}
19 | }
20 | 
21 | func (l *LookaheadIterator) Close() {
22 | 	C.ts_lookahead_iterator_delete(l._inner)
23 | }
24 | 
25 | func (l *LookaheadIterator) Language() *Language {
26 | 	return NewLanguage(unsafe.Pointer(C.ts_lookahead_iterator_language(l._inner)))
27 | }
28 | 
29 | // Get the current symbol of the lookahead iterator.
30 | func (l *LookaheadIterator) Symbol() uint16 {
31 | 	return uint16(C.ts_lookahead_iterator_current_symbol(l._inner))
32 | }
33 | 
34 | // Get the current symbol name of the lookahead iterator.
35 | func (l *LookaheadIterator) SymbolName() string {
36 | 	return C.GoString(C.ts_lookahead_iterator_current_symbol_name(l._inner))
37 | }
38 | 
39 | // Reset the lookahead iterator.
40 | //
41 | // This returns `true` if the language was set successfully and `false`
42 | // otherwise.
43 | func (l *LookaheadIterator) Reset(language *Language, state uint16) bool {
44 | 	return bool(C.ts_lookahead_iterator_reset(l._inner, language.Inner, C.TSStateId(state)))
45 | }
46 | 
47 | // Reset the lookahead iterator to another state.
48 | //
49 | // This returns `true` if the iterator was reset to the given state and
50 | // `false` otherwise.
51 | func (l *LookaheadIterator) ResetState(state uint16) bool {
52 | 	return bool(C.ts_lookahead_iterator_reset_state(l._inner, C.TSStateId(state)))
53 | }
54 | 
55 | // Iterate symbols.
56 | func (l *LookaheadIterator) Iter() []uint16 {
57 | 	var symbols []uint16
58 | 	for C.ts_lookahead_iterator_next(l._inner) {
59 | 		symbols = append(symbols, l.Symbol())
60 | 	}
61 | 	return symbols
62 | }
63 | 
64 | // Iterate symbol names.
65 | func (l *LookaheadIterator) IterNames() []string {
66 | 	var names []string
67 | 	for C.ts_lookahead_iterator_next(l._inner) {
68 | 		names = append(names, l.SymbolName())
69 | 	}
70 | 	return names
71 | }
72 | 


--------------------------------------------------------------------------------
/src/reusable_node.h:
--------------------------------------------------------------------------------
 1 | #include "./subtree.h"
 2 | 
 3 | typedef struct {
 4 |   Subtree tree;
 5 |   uint32_t child_index;
 6 |   uint32_t byte_offset;
 7 | } StackEntry;
 8 | 
 9 | typedef struct {
10 |   Array(StackEntry) stack;
11 |   Subtree last_external_token;
12 | } ReusableNode;
13 | 
14 | static inline ReusableNode reusable_node_new(void) {
15 |   return (ReusableNode) {array_new(), NULL_SUBTREE};
16 | }
17 | 
18 | static inline void reusable_node_clear(ReusableNode *self) {
19 |   array_clear(&self->stack);
20 |   self->last_external_token = NULL_SUBTREE;
21 | }
22 | 
23 | static inline Subtree reusable_node_tree(ReusableNode *self) {
24 |   return self->stack.size > 0
25 |     ? self->stack.contents[self->stack.size - 1].tree
26 |     : NULL_SUBTREE;
27 | }
28 | 
29 | static inline uint32_t reusable_node_byte_offset(ReusableNode *self) {
30 |   return self->stack.size > 0
31 |     ? self->stack.contents[self->stack.size - 1].byte_offset
32 |     : UINT32_MAX;
33 | }
34 | 
35 | static inline void reusable_node_delete(ReusableNode *self) {
36 |   array_delete(&self->stack);
37 | }
38 | 
39 | static inline void reusable_node_advance(ReusableNode *self) {
40 |   StackEntry last_entry = *array_back(&self->stack);
41 |   uint32_t byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree);
42 |   if (ts_subtree_has_external_tokens(last_entry.tree)) {
43 |     self->last_external_token = ts_subtree_last_external_token(last_entry.tree);
44 |   }
45 | 
46 |   Subtree tree;
47 |   uint32_t next_index;
48 |   do {
49 |     StackEntry popped_entry = array_pop(&self->stack);
50 |     next_index = popped_entry.child_index + 1;
51 |     if (self->stack.size == 0) return;
52 |     tree = array_back(&self->stack)->tree;
53 |   } while (ts_subtree_child_count(tree) <= next_index);
54 | 
55 |   array_push(&self->stack, ((StackEntry) {
56 |     .tree = ts_subtree_children(tree)[next_index],
57 |     .child_index = next_index,
58 |     .byte_offset = byte_offset,
59 |   }));
60 | }
61 | 
62 | static inline bool reusable_node_descend(ReusableNode *self) {
63 |   StackEntry last_entry = *array_back(&self->stack);
64 |   if (ts_subtree_child_count(last_entry.tree) > 0) {
65 |     array_push(&self->stack, ((StackEntry) {
66 |       .tree = ts_subtree_children(last_entry.tree)[0],
67 |       .child_index = 0,
68 |       .byte_offset = last_entry.byte_offset,
69 |     }));
70 |     return true;
71 |   } else {
72 |     return false;
73 |   }
74 | }
75 | 
76 | static inline void reusable_node_advance_past_leaf(ReusableNode *self) {
77 |   while (reusable_node_descend(self)) {}
78 |   reusable_node_advance(self);
79 | }
80 | 
81 | static inline void reusable_node_reset(ReusableNode *self, Subtree tree) {
82 |   reusable_node_clear(self);
83 |   array_push(&self->stack, ((StackEntry) {
84 |     .tree = tree,
85 |     .child_index = 0,
86 |     .byte_offset = 0,
87 |   }));
88 | 
89 |   // Never reuse the root node, because it has a non-standard internal structure
90 |   // due to transformations that are applied when it is accepted: adding the EOF
91 |   // child and any extra children.
92 |   if (!reusable_node_descend(self)) {
93 |     reusable_node_clear(self);
94 |   }
95 | }
96 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Go Tree-sitter
 2 | 
 3 | [![CI][ci]](https://github.com/tree-sitter/go-tree-sitter/actions/workflows/ci.yml)
 4 | [![Go version][go version]](https://github.com/tree-sitter/go-tree-sitter/blob/master/go.mod)
 5 | [![Version][version]](https://github.com/tree-sitter/go-tree-sitter/tags)
 6 | [![Docs][docs]](https://pkg.go.dev/github.com/tree-sitter/go-tree-sitter)
 7 | 
 8 | This repository contains Go bindings for the [Tree-sitter](https://tree-sitter.github.io/tree-sitter/) parsing library.
 9 | 
10 | To use this in your Go project, run:
11 | 
12 | ```sh
13 | go get github.com/tree-sitter/go-tree-sitter@latest
14 | ```
15 | 
16 | Example usage:
17 | 
18 | ```go
19 | package main
20 | 
21 | import (
22 |     "fmt"
23 | 
24 |     tree_sitter "github.com/tree-sitter/go-tree-sitter"
25 |     tree_sitter_javascript "github.com/tree-sitter/tree-sitter-javascript/bindings/go"
26 | )
27 | 
28 | func main() {
29 |     code := []byte("const foo = 1 + 2")
30 | 
31 |     parser := tree_sitter.NewParser()
32 |     defer parser.Close()
33 |     parser.SetLanguage(tree_sitter.NewLanguage(tree_sitter_javascript.Language()))
34 | 
35 |     tree := parser.Parse(code, nil)
36 |     defer tree.Close()
37 | 
38 |     root := tree.RootNode()
39 |     fmt.Println(root.ToSexp())
40 | }
41 | ```
42 | 
43 | By default, none of the grammars are included in this package.
44 | This way, you can only bring in what you need, but it's at the slight cost of having to call `go get` n times.
45 | 
46 | In the example above, to fetch the JavaScript grammar, you can run the following:
47 | 
48 | ```sh
49 | go get github.com/tree-sitter/tree-sitter-javascript@latest
50 | ```
51 | 
52 | Alternatively you can also load grammars at runtime from a shared library via [purego](https://github.com/ebitengine/purego).
53 | 
54 | The example below shows how to load the JavaScript grammar from a shared library (`libtree-sitter-PARSER_NAME.so`) at runtime on Linux & macOS:
55 | 
56 | For more information on other platforms, see the [purego documentation](https://github.com/ebitengine/purego#supported-platforms)
57 | 
58 | ```go
59 | package main
60 | 
61 | import (
62 | 	tree_sitter "github.com/tree-sitter/go-tree-sitter"
63 | 	"github.com/ebitengine/purego"
64 | )
65 | 
66 | func main() {
67 | 	path := "/path/to/your/parser.so"
68 | 	lib, err := purego.Dlopen(path, purego.RTLD_NOW|purego.RTLD_GLOBAL)
69 | 	if err != nil {
70 |         // handle error
71 |     }
72 | 
73 | 	var javascriptLanguage func() uintptr
74 | 	purego.RegisterLibFunc(&javascriptLanguage, lib, "tree_sitter_javascript")
75 | 
76 | 	language := tree_sitter.NewLanguage(unsafe.Pointer(javascriptLanguage()))
77 | }
78 | ```
79 | 
80 | > [!NOTE]
81 | > Due to [bugs with `runtime.SetFinalizer` and CGO](https://groups.google.com/g/golang-nuts/c/LIWj6Gl--es), you must always call `Close`
82 | > on an object that allocates memory from C. This must be done for the `Parser`, `Tree`, `TreeCursor`, `Query`, `QueryCursor`, and `LookaheadIterator` objects.
83 | 
84 | For more information, see the [documentation](https://pkg.go.dev/github.com/tree-sitter/go-tree-sitter).
85 | 
86 | [ci]: https://img.shields.io/github/actions/workflow/status/tree-sitter/go-tree-sitter/ci.yml?logo=github&label=CI
87 | [go version]: https://img.shields.io/github/go-mod/go-version/tree-sitter/go-tree-sitter
88 | [version]: https://img.shields.io/github/v/tag/tree-sitter/go-tree-sitter?label=version
89 | [docs]: https://pkg.go.dev/badge/github.com/tree-sitter/go-tree-sitter.svg?style=flat-square
90 | 


--------------------------------------------------------------------------------
/src/wasm/stdlib.c:
--------------------------------------------------------------------------------
  1 | // This file implements a very simple allocator for external scanners running
  2 | // in WASM. Allocation is just bumping a static pointer and growing the heap
  3 | // as needed, and freeing is mostly a noop. But in the special case of freeing
  4 | // the last-allocated pointer, we'll reuse that pointer again.
  5 | 
  6 | #ifdef TREE_SITTER_FEATURE_WASM
  7 | 
  8 | #include <stdio.h>
  9 | #include <unistd.h>
 10 | #include <stdlib.h>
 11 | #include <string.h>
 12 | 
 13 | extern void tree_sitter_debug_message(const char *, size_t);
 14 | 
 15 | #define PAGESIZE 0x10000
 16 | #define MAX_HEAP_SIZE (4 * 1024 * 1024)
 17 | 
 18 | typedef struct {
 19 |   size_t size;
 20 |   char data[0];
 21 | } Region;
 22 | 
 23 | static Region *heap_end = NULL;
 24 | static Region *heap_start = NULL;
 25 | static Region *next = NULL;
 26 | 
 27 | // Get the region metadata for the given heap pointer.
 28 | static inline Region *region_for_ptr(void *ptr) {
 29 |   return ((Region *)ptr) - 1;
 30 | }
 31 | 
 32 | // Get the location of the next region after the given region,
 33 | // if the given region had the given size.
 34 | static inline Region *region_after(Region *self, size_t len) {
 35 |   char *address = self->data + len;
 36 |   char *aligned = (char *)((uintptr_t)(address + 3) & ~0x3);
 37 |   return (Region *)aligned;
 38 | }
 39 | 
 40 | static void *get_heap_end() {
 41 |   return (void *)(__builtin_wasm_memory_size(0) * PAGESIZE);
 42 | }
 43 | 
 44 | static int grow_heap(size_t size) {
 45 |   size_t new_page_count = ((size - 1) / PAGESIZE) + 1;
 46 |   return __builtin_wasm_memory_grow(0, new_page_count) != SIZE_MAX;
 47 | }
 48 | 
 49 | // Clear out the heap, and move it to the given address.
 50 | void reset_heap(void *new_heap_start) {
 51 |   heap_start = new_heap_start;
 52 |   next = new_heap_start;
 53 |   heap_end = get_heap_end();
 54 | }
 55 | 
 56 | void *malloc(size_t size) {
 57 |   Region *region_end = region_after(next, size);
 58 | 
 59 |   if (region_end > heap_end) {
 60 |     if ((char *)region_end - (char *)heap_start > MAX_HEAP_SIZE) {
 61 |       return NULL;
 62 |     }
 63 |     if (!grow_heap(size)) return NULL;
 64 |     heap_end = get_heap_end();
 65 |   }
 66 | 
 67 |   void *result = &next->data;
 68 |   next->size = size;
 69 |   next = region_end;
 70 | 
 71 |   return result;
 72 | }
 73 | 
 74 | void free(void *ptr) {
 75 |   if (ptr == NULL) return;
 76 | 
 77 |   Region *region = region_for_ptr(ptr);
 78 |   Region *region_end = region_after(region, region->size);
 79 | 
 80 |   // When freeing the last allocated pointer, re-use that
 81 |   // pointer for the next allocation.
 82 |   if (region_end == next) {
 83 |     next = region;
 84 |   }
 85 | }
 86 | 
 87 | void *calloc(size_t count, size_t size) {
 88 |   void *result = malloc(count * size);
 89 |   memset(result, 0, count * size);
 90 |   return result;
 91 | }
 92 | 
 93 | void *realloc(void *ptr, size_t new_size) {
 94 |   if (ptr == NULL) {
 95 |     return malloc(new_size);
 96 |   }
 97 | 
 98 |   Region *region = region_for_ptr(ptr);
 99 |   Region *region_end = region_after(region, region->size);
100 | 
101 |   // When reallocating the last allocated region, return
102 |   // the same pointer, and skip copying the data.
103 |   if (region_end == next) {
104 |     next = region;
105 |     return malloc(new_size);
106 |   }
107 | 
108 |   void *result = malloc(new_size);
109 |   memcpy(result, &region->data, region->size);
110 |   return result;
111 | }
112 | 
113 | #endif
114 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 3 | github.com/mattn/go-pointer v0.0.1 h1:n+XhsuGeVO6MEAp7xyEukFINEa+Quek5psIR/ylA6o0=
 4 | github.com/mattn/go-pointer v0.0.1/go.mod h1:2zXcozF6qYGgmsG+SeTZz3oAbFLdD3OWqnUbNvJZAlc=
 5 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 6 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 7 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
 8 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 9 | github.com/tree-sitter/tree-sitter-c v0.23.4 h1:nBPH3FV07DzAD7p0GfNvXM+Y7pNIoPenQWBpvM++t4c=
10 | github.com/tree-sitter/tree-sitter-c v0.23.4/go.mod h1:MkI5dOiIpeN94LNjeCp8ljXN/953JCwAby4bClMr6bw=
11 | github.com/tree-sitter/tree-sitter-cpp v0.23.4 h1:LaWZsiqQKvR65yHgKmnaqA+uz6tlDJTJFCyFIeZU/8w=
12 | github.com/tree-sitter/tree-sitter-cpp v0.23.4/go.mod h1:doqNW64BriC7WBCQ1klf0KmJpdEvfxyXtoEybnBo6v8=
13 | github.com/tree-sitter/tree-sitter-embedded-template v0.23.2 h1:nFkkH6Sbe56EXLmZBqHHcamTpmz3TId97I16EnGy4rg=
14 | github.com/tree-sitter/tree-sitter-embedded-template v0.23.2/go.mod h1:HNPOhN0qF3hWluYLdxWs5WbzP/iE4aaRVPMsdxuzIaQ=
15 | github.com/tree-sitter/tree-sitter-go v0.23.4 h1:yt5KMGnTHS+86pJmLIAZMWxukr8W7Ae1STPvQUuNROA=
16 | github.com/tree-sitter/tree-sitter-go v0.23.4/go.mod h1:Jrx8QqYN0v7npv1fJRH1AznddllYiCMUChtVjxPK040=
17 | github.com/tree-sitter/tree-sitter-html v0.23.2 h1:1UYDV+Yd05GGRhVnTcbP58GkKLSHHZwVaN+lBZV11Lc=
18 | github.com/tree-sitter/tree-sitter-html v0.23.2/go.mod h1:gpUv/dG3Xl/eebqgeYeFMt+JLOY9cgFinb/Nw08a9og=
19 | github.com/tree-sitter/tree-sitter-java v0.23.5 h1:J9YeMGMwXYlKSP3K4Us8CitC6hjtMjqpeOf2GGo6tig=
20 | github.com/tree-sitter/tree-sitter-java v0.23.5/go.mod h1:NRKlI8+EznxA7t1Yt3xtraPk1Wzqh3GAIC46wxvc320=
21 | github.com/tree-sitter/tree-sitter-javascript v0.23.1 h1:1fWupaRC0ArlHJ/QJzsfQ3Ibyopw7ZfQK4xXc40Zveo=
22 | github.com/tree-sitter/tree-sitter-javascript v0.23.1/go.mod h1:lmGD1EJdCA+v0S1u2fFgepMg/opzSg/4pgFym2FPGAs=
23 | github.com/tree-sitter/tree-sitter-json v0.24.8 h1:tV5rMkihgtiOe14a9LHfDY5kzTl5GNUYe6carZBn0fQ=
24 | github.com/tree-sitter/tree-sitter-json v0.24.8/go.mod h1:F351KK0KGvCaYbZ5zxwx/gWWvZhIDl0eMtn+1r+gQbo=
25 | github.com/tree-sitter/tree-sitter-php v0.23.11 h1:iHewsLNDmznh8kgGyfWfujsZxIz1YGbSd2ZTEM0ZiP8=
26 | github.com/tree-sitter/tree-sitter-php v0.23.11/go.mod h1:T/kbfi+UcCywQfUNAJnGTN/fMSUjnwPXA8k4yoIks74=
27 | github.com/tree-sitter/tree-sitter-python v0.23.6 h1:qHnWFR5WhtMQpxBZRwiaU5Hk/29vGju6CVtmvu5Haas=
28 | github.com/tree-sitter/tree-sitter-python v0.23.6/go.mod h1:cpdthSy/Yoa28aJFBscFHlGiU+cnSiSh1kuDVtI8YeM=
29 | github.com/tree-sitter/tree-sitter-ruby v0.23.1 h1:T/NKHUA+iVbHM440hFx+lzVOzS4dV6z8Qw8ai+72bYo=
30 | github.com/tree-sitter/tree-sitter-ruby v0.23.1/go.mod h1:kUS4kCCQloFcdX6sdpr8p6r2rogbM6ZjTox5ZOQy8cA=
31 | github.com/tree-sitter/tree-sitter-rust v0.23.2 h1:6AtoooCW5GqNrRpfnvl0iUhxTAZEovEmLKDbyHlfw90=
32 | github.com/tree-sitter/tree-sitter-rust v0.23.2/go.mod h1:hfeGWic9BAfgTrc7Xf6FaOAguCFJRo3RBbs7QJ6D7MI=
33 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
34 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
35 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
36 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
37 | 


--------------------------------------------------------------------------------
/allocator.go:
--------------------------------------------------------------------------------
  1 | package tree_sitter
  2 | 
  3 | /*
  4 | #cgo CFLAGS: -Iinclude -Isrc -std=c11 -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE
  5 | #include <tree_sitter/api.h>
  6 | #include "allocator.h"
  7 | */
  8 | import "C"
  9 | 
 10 | import (
 11 | 	"sync/atomic"
 12 | 	"unsafe"
 13 | )
 14 | 
 15 | var (
 16 | 	malloc_fn  atomic.Value
 17 | 	calloc_fn  atomic.Value
 18 | 	realloc_fn atomic.Value
 19 | 	free_fn    atomic.Value
 20 | )
 21 | 
 22 | func init() {
 23 | 	malloc_fn.Store((func(C.size_t) unsafe.Pointer)(nil))
 24 | 	calloc_fn.Store((func(C.size_t, C.size_t) unsafe.Pointer)(nil))
 25 | 	realloc_fn.Store((func(unsafe.Pointer, C.size_t) unsafe.Pointer)(nil))
 26 | 	free_fn.Store((func(unsafe.Pointer))(nil))
 27 | }
 28 | 
 29 | //export go_malloc
 30 | func go_malloc(size C.size_t) unsafe.Pointer {
 31 | 	if fn := malloc_fn.Load().(func(C.size_t) unsafe.Pointer); fn != nil {
 32 | 		return fn(size)
 33 | 	}
 34 | 	return C.malloc(size)
 35 | }
 36 | 
 37 | //export go_calloc
 38 | func go_calloc(num, size C.size_t) unsafe.Pointer {
 39 | 	if fn := calloc_fn.Load().(func(C.size_t, C.size_t) unsafe.Pointer); fn != nil {
 40 | 		return fn(num, size)
 41 | 	}
 42 | 	return C.calloc(num, size)
 43 | }
 44 | 
 45 | //export go_realloc
 46 | func go_realloc(ptr unsafe.Pointer, size C.size_t) unsafe.Pointer {
 47 | 	if fn := realloc_fn.Load().(func(unsafe.Pointer, C.size_t) unsafe.Pointer); fn != nil {
 48 | 		return fn(ptr, size)
 49 | 	}
 50 | 	return C.realloc(ptr, size)
 51 | }
 52 | 
 53 | //export go_free
 54 | func go_free(ptr unsafe.Pointer) {
 55 | 	if fn := free_fn.Load().(func(unsafe.Pointer)); fn != nil {
 56 | 		fn(ptr)
 57 | 		return
 58 | 	}
 59 | 	C.free(ptr)
 60 | }
 61 | 
 62 | // Sets the memory allocation functions that the core library should use.
 63 | func SetAllocator(
 64 | 	newMalloc func(size uint) unsafe.Pointer,
 65 | 	newCalloc func(num, size uint) unsafe.Pointer,
 66 | 	newRealloc func(ptr unsafe.Pointer, size uint) unsafe.Pointer,
 67 | 	newFree func(ptr unsafe.Pointer),
 68 | ) {
 69 | 	if newMalloc == nil && newCalloc == nil && newRealloc == nil && newFree == nil {
 70 | 		malloc_fn.Store((func(C.size_t) unsafe.Pointer)(nil))
 71 | 		calloc_fn.Store((func(C.size_t, C.size_t) unsafe.Pointer)(nil))
 72 | 		realloc_fn.Store((func(unsafe.Pointer, C.size_t) unsafe.Pointer)(nil))
 73 | 		free_fn.Store((func(unsafe.Pointer))(nil))
 74 | 
 75 | 		C.ts_set_allocator(nil, nil, nil, nil)
 76 | 		return
 77 | 	}
 78 | 
 79 | 	if newMalloc != nil {
 80 | 		malloc_fn.Store(func(size C.size_t) unsafe.Pointer {
 81 | 			return newMalloc(uint(size))
 82 | 		})
 83 | 	} else {
 84 | 		malloc_fn.Store(func(size C.size_t) unsafe.Pointer {
 85 | 			return C.malloc(size)
 86 | 		})
 87 | 	}
 88 | 
 89 | 	if newCalloc != nil {
 90 | 		calloc_fn.Store(func(num, size C.size_t) unsafe.Pointer {
 91 | 			return newCalloc(uint(num), uint(size))
 92 | 		})
 93 | 	} else {
 94 | 		calloc_fn.Store(func(num, size C.size_t) unsafe.Pointer {
 95 | 			return C.calloc(num, size)
 96 | 		})
 97 | 	}
 98 | 
 99 | 	if newRealloc != nil {
100 | 		realloc_fn.Store(func(ptr unsafe.Pointer, size C.size_t) unsafe.Pointer {
101 | 			return newRealloc(ptr, uint(size))
102 | 		})
103 | 	} else {
104 | 		realloc_fn.Store(func(ptr unsafe.Pointer, size C.size_t) unsafe.Pointer {
105 | 			return C.realloc(ptr, size)
106 | 		})
107 | 	}
108 | 
109 | 	if newFree != nil {
110 | 		free_fn.Store(func(ptr unsafe.Pointer) {
111 | 			newFree(ptr)
112 | 		})
113 | 	} else {
114 | 		free_fn.Store(func(ptr unsafe.Pointer) {
115 | 			C.free(ptr)
116 | 		})
117 | 	}
118 | 
119 | 	var cMalloc, cCalloc, cRealloc, cFree unsafe.Pointer
120 | 	if newMalloc != nil {
121 | 		cMalloc = unsafe.Pointer(C.c_malloc_fn)
122 | 	}
123 | 	if newCalloc != nil {
124 | 		cCalloc = unsafe.Pointer(C.c_calloc_fn)
125 | 	}
126 | 	if newRealloc != nil {
127 | 		cRealloc = unsafe.Pointer(C.c_realloc_fn)
128 | 	}
129 | 	if newFree != nil {
130 | 		cFree = unsafe.Pointer(C.c_free_fn)
131 | 	}
132 | 
133 | 	C.ts_set_allocator(
134 | 		(*[0]byte)(cMalloc),
135 | 		(*[0]byte)(cCalloc),
136 | 		(*[0]byte)(cRealloc),
137 | 		(*[0]byte)(cFree),
138 | 	)
139 | }
140 | 


--------------------------------------------------------------------------------
/src/clock.h:
--------------------------------------------------------------------------------
  1 | #ifndef TREE_SITTER_CLOCK_H_
  2 | #define TREE_SITTER_CLOCK_H_
  3 | 
  4 | #include <stdbool.h>
  5 | #include <stdint.h>
  6 | 
  7 | typedef uint64_t TSDuration;
  8 | 
  9 | #ifdef _WIN32
 10 | 
 11 | // Windows:
 12 | // * Represent a time as a performance counter value.
 13 | // * Represent a duration as a number of performance counter ticks.
 14 | 
 15 | #include <windows.h>
 16 | typedef uint64_t TSClock;
 17 | 
 18 | static inline TSDuration duration_from_micros(uint64_t micros) {
 19 |   LARGE_INTEGER frequency;
 20 |   QueryPerformanceFrequency(&frequency);
 21 |   return micros * (uint64_t)frequency.QuadPart / 1000000;
 22 | }
 23 | 
 24 | static inline uint64_t duration_to_micros(TSDuration self) {
 25 |   LARGE_INTEGER frequency;
 26 |   QueryPerformanceFrequency(&frequency);
 27 |   return self * 1000000 / (uint64_t)frequency.QuadPart;
 28 | }
 29 | 
 30 | static inline TSClock clock_null(void) {
 31 |   return 0;
 32 | }
 33 | 
 34 | static inline TSClock clock_now(void) {
 35 |   LARGE_INTEGER result;
 36 |   QueryPerformanceCounter(&result);
 37 |   return (uint64_t)result.QuadPart;
 38 | }
 39 | 
 40 | static inline TSClock clock_after(TSClock base, TSDuration duration) {
 41 |   return base + duration;
 42 | }
 43 | 
 44 | static inline bool clock_is_null(TSClock self) {
 45 |   return !self;
 46 | }
 47 | 
 48 | static inline bool clock_is_gt(TSClock self, TSClock other) {
 49 |   return self > other;
 50 | }
 51 | 
 52 | #elif defined(CLOCK_MONOTONIC)
 53 | 
 54 | // POSIX with monotonic clock support (Linux, macOS)
 55 | // * Represent a time as a monotonic (seconds, nanoseconds) pair.
 56 | // * Represent a duration as a number of microseconds.
 57 | //
 58 | // On these platforms, parse timeouts will correspond accurately to
 59 | // real time, regardless of what other processes are running.
 60 | 
 61 | #include <time.h>
 62 | typedef struct timespec TSClock;
 63 | 
 64 | static inline TSDuration duration_from_micros(uint64_t micros) {
 65 |   return micros;
 66 | }
 67 | 
 68 | static inline uint64_t duration_to_micros(TSDuration self) {
 69 |   return self;
 70 | }
 71 | 
 72 | static inline TSClock clock_now(void) {
 73 |   TSClock result;
 74 |   clock_gettime(CLOCK_MONOTONIC, &result);
 75 |   return result;
 76 | }
 77 | 
 78 | static inline TSClock clock_null(void) {
 79 |   return (TSClock) {0, 0};
 80 | }
 81 | 
 82 | static inline TSClock clock_after(TSClock base, TSDuration duration) {
 83 |   TSClock result = base;
 84 |   result.tv_sec += duration / 1000000;
 85 |   result.tv_nsec += (duration % 1000000) * 1000;
 86 |   if (result.tv_nsec >= 1000000000) {
 87 |     result.tv_nsec -= 1000000000;
 88 |     ++(result.tv_sec);
 89 |   }
 90 |   return result;
 91 | }
 92 | 
 93 | static inline bool clock_is_null(TSClock self) {
 94 |   return !self.tv_sec && !self.tv_nsec;
 95 | }
 96 | 
 97 | static inline bool clock_is_gt(TSClock self, TSClock other) {
 98 |   if (self.tv_sec > other.tv_sec) return true;
 99 |   if (self.tv_sec < other.tv_sec) return false;
100 |   return self.tv_nsec > other.tv_nsec;
101 | }
102 | 
103 | #else
104 | 
105 | // POSIX without monotonic clock support
106 | // * Represent a time as a process clock value.
107 | // * Represent a duration as a number of process clock ticks.
108 | //
109 | // On these platforms, parse timeouts may be affected by other processes,
110 | // which is not ideal, but is better than using a non-monotonic time API
111 | // like `gettimeofday`.
112 | 
113 | #include <time.h>
114 | typedef uint64_t TSClock;
115 | 
116 | static inline TSDuration duration_from_micros(uint64_t micros) {
117 |   return micros * (uint64_t)CLOCKS_PER_SEC / 1000000;
118 | }
119 | 
120 | static inline uint64_t duration_to_micros(TSDuration self) {
121 |   return self * 1000000 / (uint64_t)CLOCKS_PER_SEC;
122 | }
123 | 
124 | static inline TSClock clock_null(void) {
125 |   return 0;
126 | }
127 | 
128 | static inline TSClock clock_now(void) {
129 |   return (uint64_t)clock();
130 | }
131 | 
132 | static inline TSClock clock_after(TSClock base, TSDuration duration) {
133 |   return base + duration;
134 | }
135 | 
136 | static inline bool clock_is_null(TSClock self) {
137 |   return !self;
138 | }
139 | 
140 | static inline bool clock_is_gt(TSClock self, TSClock other) {
141 |   return self > other;
142 | }
143 | 
144 | #endif
145 | 
146 | #endif  // TREE_SITTER_CLOCK_H_
147 | 


--------------------------------------------------------------------------------
/edit_test.go:
--------------------------------------------------------------------------------
  1 | package tree_sitter_test
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math/rand"
  6 | 
  7 | 	. "github.com/tree-sitter/go-tree-sitter"
  8 | )
  9 | 
 10 | type testEdit struct {
 11 | 	insertedText  []byte
 12 | 	position      uint
 13 | 	deletedLength uint
 14 | }
 15 | 
 16 | func performEdit(tree *Tree, input *[]byte, edit *testEdit) (InputEdit, error) {
 17 | 	startByte := edit.position
 18 | 	oldEndByte := edit.position + edit.deletedLength
 19 | 	newEndByte := edit.position + uint(len(edit.insertedText))
 20 | 
 21 | 	startPosition, err := positionForOffset(*input, startByte)
 22 | 	if err != nil {
 23 | 		return InputEdit{}, err
 24 | 	}
 25 | 
 26 | 	oldEndPosition, err := positionForOffset(*input, oldEndByte)
 27 | 	if err != nil {
 28 | 		return InputEdit{}, err
 29 | 	}
 30 | 
 31 | 	newInput := make([]byte, 0, len(*input)-int(edit.deletedLength)+len(edit.insertedText))
 32 | 	newInput = append(newInput, (*input)[:startByte]...)
 33 | 	newInput = append(newInput, edit.insertedText...)
 34 | 	newInput = append(newInput, (*input)[oldEndByte:]...)
 35 | 	*input = newInput
 36 | 
 37 | 	newEndPosition, err := positionForOffset(*input, newEndByte)
 38 | 	if err != nil {
 39 | 		return InputEdit{}, err
 40 | 	}
 41 | 
 42 | 	inputEdit := InputEdit{
 43 | 		StartByte:      startByte,
 44 | 		OldEndByte:     oldEndByte,
 45 | 		NewEndByte:     newEndByte,
 46 | 		StartPosition:  startPosition,
 47 | 		OldEndPosition: oldEndPosition,
 48 | 		NewEndPosition: newEndPosition,
 49 | 	}
 50 | 	tree.Edit(&inputEdit)
 51 | 	return inputEdit, nil
 52 | }
 53 | 
 54 | func positionForOffset(input []byte, offset uint) (Point, error) {
 55 | 	if offset > uint(len(input)) {
 56 | 		return Point{}, fmt.Errorf("failed to address an offset: %d", offset)
 57 | 	}
 58 | 
 59 | 	var result Point
 60 | 	var last uint
 61 | 
 62 | 	for i := uint(0); i < offset; i++ {
 63 | 		if input[i] == '\n' {
 64 | 			result.Row++
 65 | 			last = i
 66 | 		}
 67 | 	}
 68 | 
 69 | 	if result.Row > 0 {
 70 | 		result.Column = uint(offset - last - 1)
 71 | 	} else {
 72 | 		result.Column = uint(offset)
 73 | 	}
 74 | 
 75 | 	return result, nil
 76 | }
 77 | 
 78 | func invertEdit(input []byte, edit *testEdit) *testEdit {
 79 | 	position := edit.position
 80 | 	removedContent := input[position : position+edit.deletedLength]
 81 | 	return &testEdit{
 82 | 		position:      position,
 83 | 		deletedLength: uint(len(edit.insertedText)),
 84 | 		insertedText:  removedContent,
 85 | 	}
 86 | }
 87 | 
 88 | func getRandomEdit(rand *rand.Rand, input []byte) testEdit {
 89 | 	choice := rand.Intn(10)
 90 | 	if choice < 2 {
 91 | 		// Insert text at end
 92 | 		insertedText := randWords(rand, 3)
 93 | 		return testEdit{
 94 | 			position:      uint(len(input)),
 95 | 			deletedLength: 0,
 96 | 			insertedText:  insertedText,
 97 | 		}
 98 | 	} else if choice < 5 {
 99 | 		// Delete text from the end
100 | 		deletedLength := uint(rand.Intn(30))
101 | 		if deletedLength > uint(len(input)) {
102 | 			deletedLength = uint(len(input))
103 | 		}
104 | 		return testEdit{
105 | 			position:      uint(len(input)) - deletedLength,
106 | 			deletedLength: deletedLength,
107 | 			insertedText:  []byte{},
108 | 		}
109 | 	} else if choice < 8 {
110 | 		// Insert at a random position
111 | 		position := uint(rand.Intn(len(input)))
112 | 		wordCount := 1 + rand.Intn(3)
113 | 		insertedText := randWords(rand, wordCount)
114 | 		return testEdit{
115 | 			position:      position,
116 | 			deletedLength: 0,
117 | 			insertedText:  insertedText,
118 | 		}
119 | 	} else {
120 | 		// Replace at random position
121 | 		position := uint(rand.Intn(len(input)))
122 | 		deletedLength := uint(rand.Intn(len(input) - int(position)))
123 | 		wordCount := 1 + rand.Intn(3)
124 | 		insertedText := randWords(rand, wordCount)
125 | 		return testEdit{
126 | 			position:      position,
127 | 			deletedLength: deletedLength,
128 | 			insertedText:  insertedText,
129 | 		}
130 | 	}
131 | }
132 | 
133 | var operators = []byte{'+', '-', '<', '>', '(', ')', '*', '/', '&', '|', '!', ',', '.', '%'}
134 | 
135 | func randWords(rand *rand.Rand, maxCount int) []byte {
136 | 	var result []byte
137 | 	wordCount := rand.Intn(maxCount)
138 | 	for i := 0; i < wordCount; i++ {
139 | 		if i > 0 {
140 | 			if rand.Intn(5) == 0 {
141 | 				result = append(result, '\n')
142 | 			} else {
143 | 				result = append(result, ' ')
144 | 			}
145 | 		}
146 | 		if rand.Intn(3) == 0 {
147 | 			index := rand.Intn(len(operators))
148 | 			result = append(result, operators[index])
149 | 		} else {
150 | 			for j := 0; j < rand.Intn(8); j++ {
151 | 				result = append(result, byte(rand.Intn(26)+'a'))
152 | 			}
153 | 		}
154 | 	}
155 | 	return result
156 | }
157 | 


--------------------------------------------------------------------------------
/tree.go:
--------------------------------------------------------------------------------
  1 | package tree_sitter
  2 | 
  3 | /*
  4 | #cgo CFLAGS: -Iinclude -Isrc -std=c11 -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE
  5 | #include <tree_sitter/api.h>
  6 | */
  7 | import "C"
  8 | 
  9 | import (
 10 | 	"unsafe"
 11 | )
 12 | 
 13 | // A stateful object that this is used to produce a [Tree] based on some
 14 | // source code.
 15 | type Tree struct {
 16 | 	_inner *C.TSTree
 17 | }
 18 | 
 19 | // Create a new tree from a raw pointer.
 20 | func newTree(inner *C.TSTree) *Tree {
 21 | 	return &Tree{_inner: inner}
 22 | }
 23 | 
 24 | // Get the root node of the syntax tree.
 25 | func (t *Tree) RootNode() *Node {
 26 | 	return &Node{_inner: C.ts_tree_root_node(t._inner)}
 27 | }
 28 | 
 29 | // Get the root node of the syntax tree, but with its position shifted
 30 | // forward by the given offset.
 31 | func (t *Tree) RootNodeWithOffset(offsetBytes int, offsetExtent Point) *Node {
 32 | 	return &Node{_inner: C.ts_tree_root_node_with_offset(t._inner, C.uint(offsetBytes), offsetExtent.toTSPoint())}
 33 | }
 34 | 
 35 | // Get the language that was used to parse the syntax tree.
 36 | func (t *Tree) Language() *Language {
 37 | 	return &Language{Inner: C.ts_tree_language(t._inner)}
 38 | }
 39 | 
 40 | // Edit the syntax tree to keep it in sync with source code that has been
 41 | // edited.
 42 | //
 43 | // You must describe the edit both in terms of byte offsets and in terms of
 44 | // row/column coordinates.
 45 | func (t *Tree) Edit(edit *InputEdit) {
 46 | 	C.ts_tree_edit(t._inner, edit.toTSInputEdit())
 47 | }
 48 | 
 49 | // Create a new [TreeCursor] starting from the root of the tree.
 50 | func (t *Tree) Walk() *TreeCursor {
 51 | 	return t.RootNode().Walk()
 52 | }
 53 | 
 54 | // Compare this old edited syntax tree to a new syntax tree representing
 55 | // the same document, returning a sequence of ranges whose syntactic
 56 | // structure has changed.
 57 | //
 58 | // For this to work correctly, this syntax tree must have been edited such
 59 | // that its ranges match up to the new tree. Generally, you'll want to
 60 | // call this method right after calling one of the [Parser.parse]
 61 | // functions. Call it on the old tree that was passed to parse, and
 62 | // pass the new tree that was returned from `parse`.
 63 | //
 64 | // The returned ranges indicate areas where the hierarchical structure of syntax
 65 | // nodes (from root to leaf) has changed between the old and new trees. Characters
 66 | // outside these ranges have identical ancestor nodes in both trees.
 67 | //
 68 | // Note that the returned ranges may be slightly larger than the exact changed areas,
 69 | // but Tree-sitter attempts to make them as small as possible.
 70 | func (t *Tree) ChangedRanges(other *Tree) []Range {
 71 | 	var count C.uint
 72 | 	ptr := C.ts_tree_get_changed_ranges(t._inner, other._inner, &count)
 73 | 	ranges := make([]Range, int(count))
 74 | 	for i := uintptr(0); i < uintptr(count); i++ {
 75 | 		val := *(*C.TSRange)(unsafe.Pointer(uintptr(unsafe.Pointer(ptr)) + i*unsafe.Sizeof(*ptr)))
 76 | 		ranges[i] = Range{
 77 | 			StartPoint: Point{Row: uint(val.start_point.row), Column: uint(val.start_point.column)},
 78 | 			EndPoint:   Point{Row: uint(val.end_point.row), Column: uint(val.end_point.column)},
 79 | 			StartByte:  uint(val.start_byte),
 80 | 			EndByte:    uint(val.end_byte),
 81 | 		}
 82 | 	}
 83 | 	go_free(unsafe.Pointer(ptr))
 84 | 	return ranges
 85 | }
 86 | 
 87 | // Get the included ranges that were used to parse the syntax tree.
 88 | func (t *Tree) IncludedRanges() []Range {
 89 | 	var count C.uint
 90 | 	ptr := C.ts_tree_included_ranges(t._inner, &count)
 91 | 	ranges := make([]Range, int(count))
 92 | 	for i := uintptr(0); i < uintptr(count); i++ {
 93 | 		val := *(*C.TSRange)(unsafe.Pointer(uintptr(unsafe.Pointer(ptr)) + i*unsafe.Sizeof(*ptr)))
 94 | 		ranges[i] = Range{
 95 | 			StartPoint: Point{Row: uint(val.start_point.row), Column: uint(val.start_point.column)},
 96 | 			EndPoint:   Point{Row: uint(val.end_point.row), Column: uint(val.end_point.column)},
 97 | 			StartByte:  uint(val.start_byte),
 98 | 			EndByte:    uint(val.end_byte),
 99 | 		}
100 | 	}
101 | 	go_free(unsafe.Pointer(ptr))
102 | 	return ranges
103 | }
104 | 
105 | // Print a graph of the tree to the given file descriptor.
106 | // The graph is formatted in the DOT language. You may want to pipe this
107 | // graph directly to a `dot(1)` process in order to generate SVG
108 | // output.
109 | func (t *Tree) PrintDotGraph(file int) {
110 | 	C.ts_tree_print_dot_graph(t._inner, C.int(file))
111 | }
112 | 
113 | func (t *Tree) Close() {
114 | 	if t != nil {
115 | 		C.ts_tree_delete(t._inner)
116 | 	}
117 | }
118 | 
119 | func (t *Tree) Clone() *Tree {
120 | 	return newTree(C.ts_tree_copy(t._inner))
121 | }
122 | 


--------------------------------------------------------------------------------
/src/stack.h:
--------------------------------------------------------------------------------
  1 | #ifndef TREE_SITTER_PARSE_STACK_H_
  2 | #define TREE_SITTER_PARSE_STACK_H_
  3 | 
  4 | #ifdef __cplusplus
  5 | extern "C" {
  6 | #endif
  7 | 
  8 | #include "./array.h"
  9 | #include "./subtree.h"
 10 | #include <stdio.h>
 11 | 
 12 | typedef struct Stack Stack;
 13 | 
 14 | typedef unsigned StackVersion;
 15 | #define STACK_VERSION_NONE ((StackVersion)-1)
 16 | 
 17 | typedef struct {
 18 |   SubtreeArray subtrees;
 19 |   StackVersion version;
 20 | } StackSlice;
 21 | typedef Array(StackSlice) StackSliceArray;
 22 | 
 23 | typedef struct {
 24 |   Length position;
 25 |   unsigned depth;
 26 |   TSStateId state;
 27 | } StackSummaryEntry;
 28 | typedef Array(StackSummaryEntry) StackSummary;
 29 | 
 30 | // Create a stack.
 31 | Stack *ts_stack_new(SubtreePool *subtree_pool);
 32 | 
 33 | // Release the memory reserved for a given stack.
 34 | void ts_stack_delete(Stack *self);
 35 | 
 36 | // Get the stack's current number of versions.
 37 | uint32_t ts_stack_version_count(const Stack *self);
 38 | 
 39 | // Get the state at the top of the given version of the stack. If the stack is
 40 | // empty, this returns the initial state, 0.
 41 | TSStateId ts_stack_state(const Stack *self, StackVersion version);
 42 | 
 43 | // Get the last external token associated with a given version of the stack.
 44 | Subtree ts_stack_last_external_token(const Stack *self, StackVersion version);
 45 | 
 46 | // Set the last external token associated with a given version of the stack.
 47 | void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree token);
 48 | 
 49 | // Get the position of the given version of the stack within the document.
 50 | Length ts_stack_position(const Stack *, StackVersion);
 51 | 
 52 | // Push a tree and state onto the given version of the stack.
 53 | //
 54 | // This transfers ownership of the tree to the Stack. Callers that
 55 | // need to retain ownership of the tree for their own purposes should
 56 | // first retain the tree.
 57 | void ts_stack_push(Stack *self, StackVersion version, Subtree subtree, bool pending, TSStateId state);
 58 | 
 59 | // Pop the given number of entries from the given version of the stack. This
 60 | // operation can increase the number of stack versions by revealing multiple
 61 | // versions which had previously been merged. It returns an array that
 62 | // specifies the index of each revealed version and the trees that were
 63 | // removed from that version.
 64 | StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count);
 65 | 
 66 | // Remove an error at the top of the given version of the stack.
 67 | SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version);
 68 | 
 69 | // Remove any pending trees from the top of the given version of the stack.
 70 | StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version);
 71 | 
 72 | // Remove all trees from the given version of the stack.
 73 | StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version);
 74 | 
 75 | // Get the maximum number of tree nodes reachable from this version of the stack
 76 | // since the last error was detected.
 77 | unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version);
 78 | 
 79 | int ts_stack_dynamic_precedence(Stack *self, StackVersion version);
 80 | 
 81 | bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version);
 82 | 
 83 | // Compute a summary of all the parse states near the top of the given
 84 | // version of the stack and store the summary for later retrieval.
 85 | void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth);
 86 | 
 87 | // Retrieve a summary of all the parse states near the top of the
 88 | // given version of the stack.
 89 | StackSummary *ts_stack_get_summary(Stack *self, StackVersion version);
 90 | 
 91 | // Get the total cost of all errors on the given version of the stack.
 92 | unsigned ts_stack_error_cost(const Stack *self, StackVersion version);
 93 | 
 94 | // Merge the given two stack versions if possible, returning true
 95 | // if they were successfully merged and false otherwise.
 96 | bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2);
 97 | 
 98 | // Determine whether the given two stack versions can be merged.
 99 | bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2);
100 | 
101 | Subtree ts_stack_resume(Stack *self, StackVersion version);
102 | 
103 | void ts_stack_pause(Stack *self, StackVersion version, Subtree lookahead);
104 | 
105 | void ts_stack_halt(Stack *self, StackVersion version);
106 | 
107 | bool ts_stack_is_active(const Stack *self, StackVersion version);
108 | 
109 | bool ts_stack_is_paused(const Stack *self, StackVersion version);
110 | 
111 | bool ts_stack_is_halted(const Stack *self, StackVersion version);
112 | 
113 | void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2);
114 | 
115 | void ts_stack_swap_versions(Stack *, StackVersion v1, StackVersion v2);
116 | 
117 | StackVersion ts_stack_copy_version(Stack *self, StackVersion version);
118 | 
119 | // Remove the given version from the stack.
120 | void ts_stack_remove_version(Stack *self, StackVersion version);
121 | 
122 | void ts_stack_clear(Stack *self);
123 | 
124 | bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f);
125 | 
126 | #ifdef __cplusplus
127 | }
128 | #endif
129 | 
130 | #endif  // TREE_SITTER_PARSE_STACK_H_
131 | 


--------------------------------------------------------------------------------
/src/tree.c:
--------------------------------------------------------------------------------
  1 | #include "tree_sitter/api.h"
  2 | #include "./array.h"
  3 | #include "./get_changed_ranges.h"
  4 | #include "./length.h"
  5 | #include "./subtree.h"
  6 | #include "./tree_cursor.h"
  7 | #include "./tree.h"
  8 | 
  9 | TSTree *ts_tree_new(
 10 |   Subtree root, const TSLanguage *language,
 11 |   const TSRange *included_ranges, unsigned included_range_count
 12 | ) {
 13 |   TSTree *result = ts_malloc(sizeof(TSTree));
 14 |   result->root = root;
 15 |   result->language = ts_language_copy(language);
 16 |   result->included_ranges = ts_calloc(included_range_count, sizeof(TSRange));
 17 |   memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(TSRange));
 18 |   result->included_range_count = included_range_count;
 19 |   return result;
 20 | }
 21 | 
 22 | TSTree *ts_tree_copy(const TSTree *self) {
 23 |   ts_subtree_retain(self->root);
 24 |   return ts_tree_new(self->root, self->language, self->included_ranges, self->included_range_count);
 25 | }
 26 | 
 27 | void ts_tree_delete(TSTree *self) {
 28 |   if (!self) return;
 29 | 
 30 |   SubtreePool pool = ts_subtree_pool_new(0);
 31 |   ts_subtree_release(&pool, self->root);
 32 |   ts_subtree_pool_delete(&pool);
 33 |   ts_language_delete(self->language);
 34 |   ts_free(self->included_ranges);
 35 |   ts_free(self);
 36 | }
 37 | 
 38 | TSNode ts_tree_root_node(const TSTree *self) {
 39 |   return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0);
 40 | }
 41 | 
 42 | TSNode ts_tree_root_node_with_offset(
 43 |   const TSTree *self,
 44 |   uint32_t offset_bytes,
 45 |   TSPoint offset_extent
 46 | ) {
 47 |   Length offset = {offset_bytes, offset_extent};
 48 |   return ts_node_new(self, &self->root, length_add(offset, ts_subtree_padding(self->root)), 0);
 49 | }
 50 | 
 51 | const TSLanguage *ts_tree_language(const TSTree *self) {
 52 |   return self->language;
 53 | }
 54 | 
 55 | void ts_tree_edit(TSTree *self, const TSInputEdit *edit) {
 56 |   for (unsigned i = 0; i < self->included_range_count; i++) {
 57 |     TSRange *range = &self->included_ranges[i];
 58 |     if (range->end_byte >= edit->old_end_byte) {
 59 |       if (range->end_byte != UINT32_MAX) {
 60 |         range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte);
 61 |         range->end_point = point_add(
 62 |           edit->new_end_point,
 63 |           point_sub(range->end_point, edit->old_end_point)
 64 |         );
 65 |         if (range->end_byte < edit->new_end_byte) {
 66 |           range->end_byte = UINT32_MAX;
 67 |           range->end_point = POINT_MAX;
 68 |         }
 69 |       }
 70 |     } else if (range->end_byte > edit->start_byte) {
 71 |       range->end_byte = edit->start_byte;
 72 |       range->end_point = edit->start_point;
 73 |     }
 74 |     if (range->start_byte >= edit->old_end_byte) {
 75 |       range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte);
 76 |       range->start_point = point_add(
 77 |         edit->new_end_point,
 78 |         point_sub(range->start_point, edit->old_end_point)
 79 |       );
 80 |       if (range->start_byte < edit->new_end_byte) {
 81 |         range->start_byte = UINT32_MAX;
 82 |         range->start_point = POINT_MAX;
 83 |       }
 84 |     } else if (range->start_byte > edit->start_byte) {
 85 |       range->start_byte = edit->start_byte;
 86 |       range->start_point = edit->start_point;
 87 |     }
 88 |   }
 89 | 
 90 |   SubtreePool pool = ts_subtree_pool_new(0);
 91 |   self->root = ts_subtree_edit(self->root, edit, &pool);
 92 |   ts_subtree_pool_delete(&pool);
 93 | }
 94 | 
 95 | TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length) {
 96 |   *length = self->included_range_count;
 97 |   TSRange *ranges = ts_calloc(self->included_range_count, sizeof(TSRange));
 98 |   memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(TSRange));
 99 |   return ranges;
100 | }
101 | 
102 | TSRange *ts_tree_get_changed_ranges(const TSTree *old_tree, const TSTree *new_tree, uint32_t *length) {
103 |   TreeCursor cursor1 = {NULL, array_new(), 0};
104 |   TreeCursor cursor2 = {NULL, array_new(), 0};
105 |   ts_tree_cursor_init(&cursor1, ts_tree_root_node(old_tree));
106 |   ts_tree_cursor_init(&cursor2, ts_tree_root_node(new_tree));
107 | 
108 |   TSRangeArray included_range_differences = array_new();
109 |   ts_range_array_get_changed_ranges(
110 |     old_tree->included_ranges, old_tree->included_range_count,
111 |     new_tree->included_ranges, new_tree->included_range_count,
112 |     &included_range_differences
113 |   );
114 | 
115 |   TSRange *result;
116 |   *length = ts_subtree_get_changed_ranges(
117 |     &old_tree->root, &new_tree->root, &cursor1, &cursor2,
118 |     old_tree->language, &included_range_differences, &result
119 |   );
120 | 
121 |   array_delete(&included_range_differences);
122 |   array_delete(&cursor1.stack);
123 |   array_delete(&cursor2.stack);
124 |   return result;
125 | }
126 | 
127 | #ifdef _WIN32
128 | 
129 | #include <io.h>
130 | #include <windows.h>
131 | 
132 | int _ts_dup(HANDLE handle) {
133 |   HANDLE dup_handle;
134 |   if (!DuplicateHandle(
135 |     GetCurrentProcess(), handle,
136 |     GetCurrentProcess(), &dup_handle,
137 |     0, FALSE, DUPLICATE_SAME_ACCESS
138 |   )) return -1;
139 | 
140 |   return _open_osfhandle((intptr_t)dup_handle, 0);
141 | }
142 | 
143 | void ts_tree_print_dot_graph(const TSTree *self, int fd) {
144 |   FILE *file = _fdopen(_ts_dup((HANDLE)_get_osfhandle(fd)), "a");
145 |   ts_subtree_print_dot_graph(self->root, self->language, file);
146 |   fclose(file);
147 | }
148 | 
149 | #elif !defined(__wasi__) // WASI doesn't support dup
150 | 
151 | #include <unistd.h>
152 | 
153 | int _ts_dup(int file_descriptor) {
154 |   return dup(file_descriptor);
155 | }
156 | 
157 | void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor) {
158 |   FILE *file = fdopen(_ts_dup(file_descriptor), "a");
159 |   ts_subtree_print_dot_graph(self->root, self->language, file);
160 |   fclose(file);
161 | }
162 | 
163 | #else
164 | 
165 | void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor) {
166 |   (void)self;
167 |   (void)file_descriptor;
168 | }
169 | 
170 | #endif
171 | 


--------------------------------------------------------------------------------
/language.go:
--------------------------------------------------------------------------------
  1 | package tree_sitter
  2 | 
  3 | /*
  4 | #cgo CFLAGS: -Iinclude -Isrc -std=c11 -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE
  5 | #include <tree_sitter/api.h>
  6 | */
  7 | import "C"
  8 | 
  9 | import (
 10 | 	"fmt"
 11 | 	"unsafe"
 12 | )
 13 | 
 14 | const LANGUAGE_VERSION = C.TREE_SITTER_LANGUAGE_VERSION
 15 | 
 16 | const MIN_COMPATIBLE_LANGUAGE_VERSION = C.TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
 17 | 
 18 | // An opaque object that defines how to parse a particular language. The code
 19 | // for each [Language] is generated by the Tree-sitter CLI.
 20 | type Language struct {
 21 | 	Inner *C.TSLanguage
 22 | }
 23 | 
 24 | // An error that occurred when trying to assign an incompatible [TSLanguage] to
 25 | // a [TSParser].
 26 | type LanguageError struct {
 27 | 	version uint32
 28 | }
 29 | 
 30 | // The metadata associated with a language.
 31 | //
 32 | // Currently, this metadata can be used to check the [Semantic Version](https://semver.org/)
 33 | // of the language. This version information should be used to signal if a given parser might
 34 | // be incompatible with existing queries when upgrading between major versions, or minor versions
 35 | // if it's in zerover.
 36 | type LanguageMetadata struct {
 37 | 	MajorVersion uint8
 38 | 	MinorVersion uint8
 39 | 	PatchVersion uint8
 40 | }
 41 | 
 42 | func NewLanguage(ptr unsafe.Pointer) *Language {
 43 | 	return &Language{Inner: (*C.TSLanguage)(ptr)}
 44 | }
 45 | 
 46 | // Deprecated: Use [Language.AbiVersion] instead.
 47 | //
 48 | // Get the ABI version number that indicates which version of the
 49 | // Tree-sitter CLI that was used to generate this [Language].
 50 | func (l *Language) Version() uint32 {
 51 | 	return uint32(C.ts_language_version(l.Inner))
 52 | }
 53 | 
 54 | // Get the ABI version number that indicates which version of the
 55 | // Tree-sitter CLI that was used to generate this [Language].
 56 | func (l *Language) AbiVersion() uint32 {
 57 | 	return uint32(C.ts_language_abi_version(l.Inner))
 58 | }
 59 | 
 60 | // Get the metadata for this language. This information is generated by the
 61 | // CLI, and relies on the language author providing the correct metadata in
 62 | // the language's `tree-sitter.json` file.
 63 | func (l *Language) Metadata() *LanguageMetadata {
 64 | 	ptr := C.ts_language_metadata(l.Inner)
 65 | 	if ptr == nil {
 66 | 		return nil
 67 | 	}
 68 | 	return &LanguageMetadata{
 69 | 		MajorVersion: uint8(ptr.major_version),
 70 | 		MinorVersion: uint8(ptr.minor_version),
 71 | 		PatchVersion: uint8(ptr.patch_version),
 72 | 	}
 73 | }
 74 | 
 75 | // Get the number of distinct node types in this language.
 76 | func (l *Language) NodeKindCount() uint32 {
 77 | 	return uint32(C.ts_language_symbol_count(l.Inner))
 78 | }
 79 | 
 80 | // Get the number of valid states in this language.
 81 | func (l *Language) ParseStateCount() uint32 {
 82 | 	return uint32(C.ts_language_state_count(l.Inner))
 83 | }
 84 | 
 85 | // Get the name of the node kind for the given numerical id.
 86 | func (l *Language) NodeKindForId(id uint16) string {
 87 | 	return C.GoString(C.ts_language_symbol_name(l.Inner, C.TSSymbol(id)))
 88 | }
 89 | 
 90 | // Get the numeric id for the given node kind.
 91 | func (l *Language) IdForNodeKind(kind string, named bool) uint16 {
 92 | 	return uint16(C.ts_language_symbol_for_name(l.Inner, C.CString(kind), C.uint32_t(len(kind)), C.bool(named)))
 93 | }
 94 | 
 95 | // Check if the node type for the given numerical id is named (as opposed
 96 | // to an anonymous node type).
 97 | func (l *Language) NodeKindIsNamed(id uint16) bool {
 98 | 	return C.ts_language_symbol_type(l.Inner, C.TSSymbol(id)) == C.TSSymbolTypeRegular
 99 | }
100 | 
101 | // Check if the node type for the given numerical id is visible (as opposed
102 | // to a hidden node type).
103 | func (l *Language) NodeKindIsVisible(id uint16) bool {
104 | 	return C.ts_language_symbol_type(l.Inner, C.TSSymbol(id)) <= C.TSSymbolTypeAnonymous
105 | }
106 | 
107 | // Check if the node type for the given numerical id is a supertype.
108 | func (l *Language) NodeKindIsSupertype(id uint16) bool {
109 | 	return C.ts_language_symbol_type(l.Inner, C.TSSymbol(id)) == C.TSSymbolTypeSupertype
110 | }
111 | 
112 | // Get the number of distinct field names in this language.
113 | func (l *Language) FieldCount() uint32 {
114 | 	return uint32(C.ts_language_field_count(l.Inner))
115 | }
116 | 
117 | // Get the field names for the given numerical id.
118 | func (l *Language) FieldNameForId(id uint16) string {
119 | 	return C.GoString(C.ts_language_field_name_for_id(l.Inner, C.TSFieldId(id)))
120 | }
121 | 
122 | // Get the numerical id for the given field name.
123 | func (l *Language) FieldIdForName(name string) uint16 {
124 | 	return uint16(C.ts_language_field_id_for_name(l.Inner, C.CString(name), C.uint32_t(len(name))))
125 | }
126 | 
127 | // Get the next parse state. Combine this with
128 | // [Language.LookaheadIterator] to
129 | // generate completion suggestions or valid symbols in error nodes.
130 | func (l *Language) NextState(state uint16, id uint16) uint16 {
131 | 	return uint16(C.ts_language_next_state(l.Inner, C.TSStateId(state), C.TSSymbol(id)))
132 | }
133 | 
134 | // Create a new lookahead iterator for this language and parse state.
135 | //
136 | // This returns `nil` if state is invalid for this language.
137 | //
138 | // Iterating [LookaheadIterator] will yield valid symbols in the given
139 | // parse state. Newly created lookahead iterators will return the `ERROR`
140 | // symbol from [LookaheadIterator.Symbol].
141 | //
142 | // Lookahead iterators can be useful to generate suggestions and improve
143 | // syntax error diagnostics. To get symbols valid in an ERROR node, use the
144 | // lookahead iterator on its first leaf node state. For `MISSING` nodes, a
145 | // lookahead iterator created on the previous non-extra leaf node may be
146 | // appropriate.
147 | func (l *Language) LookaheadIterator(state uint16) *LookaheadIterator {
148 | 	ptr := C.ts_lookahead_iterator_new(l.Inner, C.TSStateId(state))
149 | 	if ptr == nil {
150 | 		return nil
151 | 	}
152 | 	return newLookaheadIterator(ptr)
153 | }
154 | 
155 | func (l *LanguageError) Error() string {
156 | 	return fmt.Sprintf("Incompatible language version %d. Expected minimum %d, maximum %d", l.version, C.TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION, C.TREE_SITTER_LANGUAGE_VERSION)
157 | }
158 | 


--------------------------------------------------------------------------------
/tree_cursor.go:
--------------------------------------------------------------------------------
  1 | package tree_sitter
  2 | 
  3 | /*
  4 | #cgo CFLAGS: -Iinclude -Isrc -std=c11 -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE
  5 | #include <tree_sitter/api.h>
  6 | */
  7 | import "C"
  8 | 
  9 | // A stateful object for walking a syntax [Tree] efficiently.
 10 | type TreeCursor struct {
 11 | 	_inner C.TSTreeCursor
 12 | }
 13 | 
 14 | func newTreeCursor(node Node) *TreeCursor {
 15 | 	return &TreeCursor{_inner: C.ts_tree_cursor_new(node._inner)}
 16 | }
 17 | 
 18 | func (tc *TreeCursor) Close() {
 19 | 	C.ts_tree_cursor_delete(&tc._inner)
 20 | }
 21 | 
 22 | func (tc *TreeCursor) Copy() *TreeCursor {
 23 | 	return &TreeCursor{_inner: C.ts_tree_cursor_copy(&tc._inner)}
 24 | }
 25 | 
 26 | // Get the tree cursor's current [Node].
 27 | func (tc *TreeCursor) Node() *Node {
 28 | 	return newNode(C.ts_tree_cursor_current_node(&tc._inner))
 29 | }
 30 | 
 31 | // Get the numerical field id of this tree cursor's current node.
 32 | //
 33 | // See also [TreeCursor.FieldName].
 34 | func (tc *TreeCursor) FieldId() uint16 {
 35 | 	return uint16(C.ts_tree_cursor_current_field_id(&tc._inner))
 36 | }
 37 | 
 38 | // Get the field name of this tree cursor's current node.
 39 | func (tc *TreeCursor) FieldName() string {
 40 | 	return C.GoString(C.ts_tree_cursor_current_field_name(&tc._inner))
 41 | }
 42 | 
 43 | // Get the depth of the cursor's current node relative to the original
 44 | // node that the cursor was constructed with.
 45 | func (tc *TreeCursor) Depth() uint32 {
 46 | 	return uint32(C.ts_tree_cursor_current_depth(&tc._inner))
 47 | }
 48 | 
 49 | // Get the index of the cursor's current node out of all of the
 50 | // descendants of the original node that the cursor was constructed with.
 51 | func (tc *TreeCursor) DescendantIndex() uint32 {
 52 | 	return uint32(C.ts_tree_cursor_current_descendant_index(&tc._inner))
 53 | }
 54 | 
 55 | // Move this cursor to the first child of its current node.
 56 | //
 57 | // This returns `true` if the cursor successfully moved, and returns
 58 | // `false` if there were no children.
 59 | func (tc *TreeCursor) GotoFirstChild() bool {
 60 | 	return bool(C.ts_tree_cursor_goto_first_child(&tc._inner))
 61 | }
 62 | 
 63 | // Move this cursor to the last child of its current node.
 64 | //
 65 | // This returns `true` if the cursor successfully moved, and returns
 66 | // `false` if there were no children.
 67 | //
 68 | // Note that this function may be slower than
 69 | // [TreeCursor.GotoFirstChild] because it needs to
 70 | // iterate through all the children to compute the child's position.
 71 | func (tc *TreeCursor) GotoLastChild() bool {
 72 | 	return bool(C.ts_tree_cursor_goto_last_child(&tc._inner))
 73 | }
 74 | 
 75 | // Move this cursor to the parent of its current node.
 76 | //
 77 | // This returns `true` if the cursor successfully moved, and returns
 78 | // `false` if there was no parent node (the cursor was already on the
 79 | // root node).
 80 | //
 81 | // Note that the given node is considered the root of the cursor,
 82 | // and the cursor cannot walk outside this node.
 83 | func (tc *TreeCursor) GotoParent() bool {
 84 | 	return bool(C.ts_tree_cursor_goto_parent(&tc._inner))
 85 | }
 86 | 
 87 | // Move this cursor to the next sibling of its current node.
 88 | //
 89 | // This returns `true` if the cursor successfully moved, and returns
 90 | // `false` if there was no next sibling node.
 91 | //
 92 | // Note that the given node is considered the root of the cursor,
 93 | // and the cursor cannot walk outside this node.
 94 | func (tc *TreeCursor) GotoNextSibling() bool {
 95 | 	return bool(C.ts_tree_cursor_goto_next_sibling(&tc._inner))
 96 | }
 97 | 
 98 | // Move the cursor to the node that is the nth descendant of
 99 | // the original node that the cursor was constructed with, where
100 | // zero represents the original node itself.
101 | func (tc *TreeCursor) GotoDescendant(descendantIndex uint32) {
102 | 	C.ts_tree_cursor_goto_descendant(&tc._inner, C.uint32_t(descendantIndex))
103 | }
104 | 
105 | // Move this cursor to the previous sibling of its current node.
106 | //
107 | // This returns `true` if the cursor successfully moved, and returns
108 | // `false` if there was no previous sibling node.
109 | //
110 | // Note, that this function may be slower than
111 | // [TreeCursor.GotoNextSibling] due to how node
112 | // positions are stored. In the worst case, this will need to iterate
113 | // through all the children upto the previous sibling node to recalculate
114 | // its position. Also note that the node the cursor was constructed with
115 | // is considered the root of the cursor, and the cursor cannot
116 | // walk outside this node.
117 | func (tc *TreeCursor) GotoPreviousSibling() bool {
118 | 	return bool(C.ts_tree_cursor_goto_previous_sibling(&tc._inner))
119 | }
120 | 
121 | // Move this cursor to the first child of its current node that extends
122 | // beyond the given byte offset.
123 | //
124 | // This returns the index of the child node if one was found, and returns
125 | // `nil` if no such child was found.
126 | func (tc *TreeCursor) GotoFirstChildForByte(byteIndex uint32) *uint {
127 | 	res := C.ts_tree_cursor_goto_first_child_for_byte(&tc._inner, C.uint32_t(byteIndex))
128 | 	if res < 0 {
129 | 		return nil
130 | 	}
131 | 	index := uint(res)
132 | 	return &index
133 | }
134 | 
135 | // Move this cursor to the first child of its current node that extends
136 | // beyond the given byte offset.
137 | //
138 | // This returns the index of the child node if one was found, and returns
139 | // `nil` if no such child was found.
140 | func (tc *TreeCursor) GotoFirstChildForPoint(point Point) *uint {
141 | 	res := C.ts_tree_cursor_goto_first_child_for_point(&tc._inner, point.toTSPoint())
142 | 	if res < 0 {
143 | 		return nil
144 | 	}
145 | 	index := uint(res)
146 | 	return &index
147 | }
148 | 
149 | // Re-initialize this tree cursor to start at the original node that the
150 | // cursor was constructed with.
151 | func (tc *TreeCursor) Reset(node Node) {
152 | 	C.ts_tree_cursor_reset(&tc._inner, node._inner)
153 | }
154 | 
155 | // Re-initialize a tree cursor to the same position as another cursor.
156 | //
157 | // Unlike [TreeCursor.Reset], this will not lose parent
158 | // information and allows reusing already created cursors.
159 | func (tc *TreeCursor) ResetTo(cursor *TreeCursor) {
160 | 	C.ts_tree_cursor_reset_to(&tc._inner, &cursor._inner)
161 | }
162 | 


--------------------------------------------------------------------------------
/src/portable/endian.h:
--------------------------------------------------------------------------------
  1 | // "License": Public Domain
  2 | // I, Mathias Panzenböck, place this file hereby into the public domain. Use it at your own risk for whatever you like.
  3 | // In case there are jurisdictions that don't support putting things in the public domain you can also consider it to
  4 | // be "dual licensed" under the BSD, MIT and Apache licenses, if you want to. This code is trivial anyway. Consider it
  5 | // an example on how to get the endian conversion functions on different platforms.
  6 | 
  7 | // updates from https://github.com/mikepb/endian.h/issues/4
  8 | 
  9 | #ifndef ENDIAN_H
 10 | #define ENDIAN_H
 11 | 
 12 | #if (defined(_WIN16) || defined(_WIN32) || defined(_WIN64)) && !defined(__WINDOWS__)
 13 | 
 14 | #    define __WINDOWS__
 15 | 
 16 | #endif
 17 | 
 18 | #if defined(HAVE_ENDIAN_H) || \
 19 |     defined(__linux__) || \
 20 |     defined(__GNU__) || \
 21 |     defined(__OpenBSD__) || \
 22 |     defined(__CYGWIN__) || \
 23 |     defined(__MSYS__) || \
 24 |     defined(__EMSCRIPTEN__)
 25 | 
 26 | # include <endian.h>
 27 | 
 28 | #elif defined(HAVE_SYS_ENDIAN_H) || \
 29 |     defined(__FreeBSD__) || \
 30 |     defined(__NetBSD__) || \
 31 |     defined(__DragonFly__)
 32 | 
 33 | # include <sys/endian.h>
 34 | 
 35 | #elif defined(__APPLE__)
 36 | #    define __BYTE_ORDER    BYTE_ORDER
 37 | #    define __BIG_ENDIAN    BIG_ENDIAN
 38 | #    define __LITTLE_ENDIAN LITTLE_ENDIAN
 39 | #    define __PDP_ENDIAN    PDP_ENDIAN
 40 | 
 41 | #    if !defined(_POSIX_C_SOURCE)
 42 | #        include <libkern/OSByteOrder.h>
 43 | 
 44 | #        define htobe16(x) OSSwapHostToBigInt16(x)
 45 | #        define htole16(x) OSSwapHostToLittleInt16(x)
 46 | #        define be16toh(x) OSSwapBigToHostInt16(x)
 47 | #        define le16toh(x) OSSwapLittleToHostInt16(x)
 48 | 
 49 | #        define htobe32(x) OSSwapHostToBigInt32(x)
 50 | #        define htole32(x) OSSwapHostToLittleInt32(x)
 51 | #        define be32toh(x) OSSwapBigToHostInt32(x)
 52 | #        define le32toh(x) OSSwapLittleToHostInt32(x)
 53 | 
 54 | #        define htobe64(x) OSSwapHostToBigInt64(x)
 55 | #        define htole64(x) OSSwapHostToLittleInt64(x)
 56 | #        define be64toh(x) OSSwapBigToHostInt64(x)
 57 | #        define le64toh(x) OSSwapLittleToHostInt64(x)
 58 | #    else
 59 | #        if BYTE_ORDER == LITTLE_ENDIAN
 60 | #            define htobe16(x) __builtin_bswap16(x)
 61 | #            define htole16(x) (x)
 62 | #            define be16toh(x) __builtin_bswap16(x)
 63 | #            define le16toh(x) (x)
 64 | 
 65 | #            define htobe32(x) __builtin_bswap32(x)
 66 | #            define htole32(x) (x)
 67 | #            define be32toh(x) __builtin_bswap32(x)
 68 | #            define le32toh(x) (x)
 69 | 
 70 | #            define htobe64(x) __builtin_bswap64(x)
 71 | #            define htole64(x) (x)
 72 | #            define be64toh(x) __builtin_bswap64(x)
 73 | #            define le64toh(x) (x)
 74 | #        elif BYTE_ORDER == BIG_ENDIAN
 75 | #            define htobe16(x) (x)
 76 | #            define htole16(x) __builtin_bswap16(x)
 77 | #            define be16toh(x) (x)
 78 | #            define le16toh(x) __builtin_bswap16(x)
 79 | 
 80 | #            define htobe32(x) (x)
 81 | #            define htole32(x) __builtin_bswap32(x)
 82 | #            define be32toh(x) (x)
 83 | #            define le32toh(x) __builtin_bswap32(x)
 84 | 
 85 | #            define htobe64(x) (x)
 86 | #            define htole64(x) __builtin_bswap64(x)
 87 | #            define be64toh(x) (x)
 88 | #            define le64toh(x) __builtin_bswap64(x)
 89 | #        else
 90 | #            error byte order not supported
 91 | #        endif
 92 | #    endif
 93 | 
 94 | #elif defined(__WINDOWS__)
 95 | 
 96 | #    if defined(_MSC_VER) && !defined(__clang__)
 97 | #        include <stdlib.h>
 98 | #        define B_SWAP_16(x) _byteswap_ushort(x)
 99 | #        define B_SWAP_32(x) _byteswap_ulong(x)
100 | #        define B_SWAP_64(x) _byteswap_uint64(x)
101 | #    else
102 | #        define B_SWAP_16(x) __builtin_bswap16(x)
103 | #        define B_SWAP_32(x) __builtin_bswap32(x)
104 | #        define B_SWAP_64(x) __builtin_bswap64(x)
105 | #    endif
106 | 
107 | # if defined(__MINGW32__) || defined(HAVE_SYS_PARAM_H)
108 | #   include <sys/param.h>
109 | # endif
110 | 
111 | #    ifndef BIG_ENDIAN
112 | #        ifdef __BIG_ENDIAN
113 | #            define BIG_ENDIAN __BIG_ENDIAN
114 | #        elif defined(__ORDER_BIG_ENDIAN__)
115 | #            define BIG_ENDIAN __ORDER_BIG_ENDIAN__
116 | #        else
117 | #            define BIG_ENDIAN 4321
118 | #        endif
119 | #    endif
120 | 
121 | #    ifndef LITTLE_ENDIAN
122 | #        ifdef __LITTLE_ENDIAN
123 | #            define LITTLE_ENDIAN __LITTLE_ENDIAN
124 | #        elif defined(__ORDER_LITTLE_ENDIAN__)
125 | #            define LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__
126 | #        else
127 | #            define LITTLE_ENDIAN 1234
128 | #        endif
129 | #    endif
130 | 
131 | #    ifndef BYTE_ORDER
132 | #        ifdef __BYTE_ORDER
133 | #            define BYTE_ORDER __BYTE_ORDER
134 | #        elif defined(__BYTE_ORDER__)
135 | #            define BYTE_ORDER __BYTE_ORDER__
136 | #        else
137 |              /* assume LE on Windows if nothing was defined */
138 | #            define BYTE_ORDER LITTLE_ENDIAN
139 | #        endif
140 | #    endif
141 | 
142 | #    if BYTE_ORDER == LITTLE_ENDIAN
143 | 
144 | #        define htobe16(x) B_SWAP_16(x)
145 | #        define htole16(x) (x)
146 | #        define be16toh(x) B_SWAP_16(x)
147 | #        define le16toh(x) (x)
148 | 
149 | #        define htobe32(x) B_SWAP_32(x)
150 | #        define htole32(x) (x)
151 | #        define be32toh(x) B_SWAP_32(x)
152 | #        define le32toh(x) (x)
153 | 
154 | #        define htobe64(x) B_SWAP_64(x)
155 | #        define htole64(x) (x)
156 | #        define be64toh(x) B_SWAP_64(x)
157 | #        define le64toh(x) (x)
158 | 
159 | #    elif BYTE_ORDER == BIG_ENDIAN
160 | 
161 | #        define htobe16(x) (x)
162 | #        define htole16(x) B_SWAP_16(x)
163 | #        define be16toh(x) (x)
164 | #        define le16toh(x) B_SWAP_16(x)
165 | 
166 | #        define htobe32(x) (x)
167 | #        define htole32(x) B_SWAP_32(x)
168 | #        define be32toh(x) (x)
169 | #        define le32toh(x) B_SWAP_32(x)
170 | 
171 | #        define htobe64(x) (x)
172 | #        define htole64(x) B_SWAP_64(x)
173 | #        define be64toh(x) (x)
174 | #        define le64toh(x) B_SWAP_64(x)
175 | 
176 | #    else
177 | 
178 | #        error byte order not supported
179 | 
180 | #    endif
181 | 
182 | #elif defined(__QNXNTO__)
183 | 
184 | #    include <gulliver.h>
185 | 
186 | #    define __LITTLE_ENDIAN 1234
187 | #    define __BIG_ENDIAN    4321
188 | #    define __PDP_ENDIAN    3412
189 | 
190 | #    if defined(__BIGENDIAN__)
191 | 
192 | #        define __BYTE_ORDER __BIG_ENDIAN
193 | 
194 | #        define htobe16(x) (x)
195 | #        define htobe32(x) (x)
196 | #        define htobe64(x) (x)
197 | 
198 | #        define htole16(x) ENDIAN_SWAP16(x)
199 | #        define htole32(x) ENDIAN_SWAP32(x)
200 | #        define htole64(x) ENDIAN_SWAP64(x)
201 | 
202 | #    elif defined(__LITTLEENDIAN__)
203 | 
204 | #        define __BYTE_ORDER __LITTLE_ENDIAN
205 | 
206 | #        define htole16(x) (x)
207 | #        define htole32(x) (x)
208 | #        define htole64(x) (x)
209 | 
210 | #        define htobe16(x) ENDIAN_SWAP16(x)
211 | #        define htobe32(x) ENDIAN_SWAP32(x)
212 | #        define htobe64(x) ENDIAN_SWAP64(x)
213 | 
214 | #    else
215 | 
216 | #        error byte order not supported
217 | 
218 | #    endif
219 | 
220 | #    define be16toh(x) ENDIAN_BE16(x)
221 | #    define be32toh(x) ENDIAN_BE32(x)
222 | #    define be64toh(x) ENDIAN_BE64(x)
223 | #    define le16toh(x) ENDIAN_LE16(x)
224 | #    define le32toh(x) ENDIAN_LE32(x)
225 | #    define le64toh(x) ENDIAN_LE64(x)
226 | 
227 | #else
228 | 
229 | #    error platform not supported
230 | 
231 | #endif
232 | 
233 | #endif
234 | 


--------------------------------------------------------------------------------
/src/parser.h:
--------------------------------------------------------------------------------
  1 | #ifndef TREE_SITTER_PARSER_H_
  2 | #define TREE_SITTER_PARSER_H_
  3 | 
  4 | #ifdef __cplusplus
  5 | extern "C" {
  6 | #endif
  7 | 
  8 | #include <stdbool.h>
  9 | #include <stdint.h>
 10 | #include <stdlib.h>
 11 | 
 12 | #define ts_builtin_sym_error ((TSSymbol)-1)
 13 | #define ts_builtin_sym_end 0
 14 | #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
 15 | 
 16 | #ifndef TREE_SITTER_API_H_
 17 | typedef uint16_t TSStateId;
 18 | typedef uint16_t TSSymbol;
 19 | typedef uint16_t TSFieldId;
 20 | typedef struct TSLanguage TSLanguage;
 21 | typedef struct TSLanguageMetadata TSLanguageMetadata;
 22 | typedef struct TSLanguageMetadata {
 23 |   uint8_t major_version;
 24 |   uint8_t minor_version;
 25 |   uint8_t patch_version;
 26 | } TSLanguageMetadata;
 27 | #endif
 28 | 
 29 | typedef struct {
 30 |   TSFieldId field_id;
 31 |   uint8_t child_index;
 32 |   bool inherited;
 33 | } TSFieldMapEntry;
 34 | 
 35 | // Used to index the field and supertype maps.
 36 | typedef struct {
 37 |   uint16_t index;
 38 |   uint16_t length;
 39 | } TSMapSlice;
 40 | 
 41 | typedef struct {
 42 |   bool visible;
 43 |   bool named;
 44 |   bool supertype;
 45 | } TSSymbolMetadata;
 46 | 
 47 | typedef struct TSLexer TSLexer;
 48 | 
 49 | struct TSLexer {
 50 |   int32_t lookahead;
 51 |   TSSymbol result_symbol;
 52 |   void (*advance)(TSLexer *, bool);
 53 |   void (*mark_end)(TSLexer *);
 54 |   uint32_t (*get_column)(TSLexer *);
 55 |   bool (*is_at_included_range_start)(const TSLexer *);
 56 |   bool (*eof)(const TSLexer *);
 57 |   void (*log)(const TSLexer *, const char *, ...);
 58 | };
 59 | 
 60 | typedef enum {
 61 |   TSParseActionTypeShift,
 62 |   TSParseActionTypeReduce,
 63 |   TSParseActionTypeAccept,
 64 |   TSParseActionTypeRecover,
 65 | } TSParseActionType;
 66 | 
 67 | typedef union {
 68 |   struct {
 69 |     uint8_t type;
 70 |     TSStateId state;
 71 |     bool extra;
 72 |     bool repetition;
 73 |   } shift;
 74 |   struct {
 75 |     uint8_t type;
 76 |     uint8_t child_count;
 77 |     TSSymbol symbol;
 78 |     int16_t dynamic_precedence;
 79 |     uint16_t production_id;
 80 |   } reduce;
 81 |   uint8_t type;
 82 | } TSParseAction;
 83 | 
 84 | typedef struct {
 85 |   uint16_t lex_state;
 86 |   uint16_t external_lex_state;
 87 | } TSLexMode;
 88 | 
 89 | typedef struct {
 90 |   uint16_t lex_state;
 91 |   uint16_t external_lex_state;
 92 |   uint16_t reserved_word_set_id;
 93 | } TSLexerMode;
 94 | 
 95 | typedef union {
 96 |   TSParseAction action;
 97 |   struct {
 98 |     uint8_t count;
 99 |     bool reusable;
100 |   } entry;
101 | } TSParseActionEntry;
102 | 
103 | typedef struct {
104 |   int32_t start;
105 |   int32_t end;
106 | } TSCharacterRange;
107 | 
108 | struct TSLanguage {
109 |   uint32_t abi_version;
110 |   uint32_t symbol_count;
111 |   uint32_t alias_count;
112 |   uint32_t token_count;
113 |   uint32_t external_token_count;
114 |   uint32_t state_count;
115 |   uint32_t large_state_count;
116 |   uint32_t production_id_count;
117 |   uint32_t field_count;
118 |   uint16_t max_alias_sequence_length;
119 |   const uint16_t *parse_table;
120 |   const uint16_t *small_parse_table;
121 |   const uint32_t *small_parse_table_map;
122 |   const TSParseActionEntry *parse_actions;
123 |   const char * const *symbol_names;
124 |   const char * const *field_names;
125 |   const TSMapSlice *field_map_slices;
126 |   const TSFieldMapEntry *field_map_entries;
127 |   const TSSymbolMetadata *symbol_metadata;
128 |   const TSSymbol *public_symbol_map;
129 |   const uint16_t *alias_map;
130 |   const TSSymbol *alias_sequences;
131 |   const TSLexerMode *lex_modes;
132 |   bool (*lex_fn)(TSLexer *, TSStateId);
133 |   bool (*keyword_lex_fn)(TSLexer *, TSStateId);
134 |   TSSymbol keyword_capture_token;
135 |   struct {
136 |     const bool *states;
137 |     const TSSymbol *symbol_map;
138 |     void *(*create)(void);
139 |     void (*destroy)(void *);
140 |     bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
141 |     unsigned (*serialize)(void *, char *);
142 |     void (*deserialize)(void *, const char *, unsigned);
143 |   } external_scanner;
144 |   const TSStateId *primary_state_ids;
145 |   const char *name;
146 |   const TSSymbol *reserved_words;
147 |   uint16_t max_reserved_word_set_size;
148 |   uint32_t supertype_count;
149 |   const TSSymbol *supertype_symbols;
150 |   const TSMapSlice *supertype_map_slices;
151 |   const TSSymbol *supertype_map_entries;
152 |   TSLanguageMetadata metadata;
153 | };
154 | 
155 | static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
156 |   uint32_t index = 0;
157 |   uint32_t size = len - index;
158 |   while (size > 1) {
159 |     uint32_t half_size = size / 2;
160 |     uint32_t mid_index = index + half_size;
161 |     const TSCharacterRange *range = &ranges[mid_index];
162 |     if (lookahead >= range->start && lookahead <= range->end) {
163 |       return true;
164 |     } else if (lookahead > range->end) {
165 |       index = mid_index;
166 |     }
167 |     size -= half_size;
168 |   }
169 |   const TSCharacterRange *range = &ranges[index];
170 |   return (lookahead >= range->start && lookahead <= range->end);
171 | }
172 | 
173 | /*
174 |  *  Lexer Macros
175 |  */
176 | 
177 | #ifdef _MSC_VER
178 | #define UNUSED __pragma(warning(suppress : 4101))
179 | #else
180 | #define UNUSED __attribute__((unused))
181 | #endif
182 | 
183 | #define START_LEXER()           \
184 |   bool result = false;          \
185 |   bool skip = false;            \
186 |   UNUSED                        \
187 |   bool eof = false;             \
188 |   int32_t lookahead;            \
189 |   goto start;                   \
190 |   next_state:                   \
191 |   lexer->advance(lexer, skip);  \
192 |   start:                        \
193 |   skip = false;                 \
194 |   lookahead = lexer->lookahead;
195 | 
196 | #define ADVANCE(state_value) \
197 |   {                          \
198 |     state = state_value;     \
199 |     goto next_state;         \
200 |   }
201 | 
202 | #define ADVANCE_MAP(...)                                              \
203 |   {                                                                   \
204 |     static const uint16_t map[] = { __VA_ARGS__ };                    \
205 |     for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) {  \
206 |       if (map[i] == lookahead) {                                      \
207 |         state = map[i + 1];                                           \
208 |         goto next_state;                                              \
209 |       }                                                               \
210 |     }                                                                 \
211 |   }
212 | 
213 | #define SKIP(state_value) \
214 |   {                       \
215 |     skip = true;          \
216 |     state = state_value;  \
217 |     goto next_state;      \
218 |   }
219 | 
220 | #define ACCEPT_TOKEN(symbol_value)     \
221 |   result = true;                       \
222 |   lexer->result_symbol = symbol_value; \
223 |   lexer->mark_end(lexer);
224 | 
225 | #define END_STATE() return result;
226 | 
227 | /*
228 |  *  Parse Table Macros
229 |  */
230 | 
231 | #define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
232 | 
233 | #define STATE(id) id
234 | 
235 | #define ACTIONS(id) id
236 | 
237 | #define SHIFT(state_value)            \
238 |   {{                                  \
239 |     .shift = {                        \
240 |       .type = TSParseActionTypeShift, \
241 |       .state = (state_value)          \
242 |     }                                 \
243 |   }}
244 | 
245 | #define SHIFT_REPEAT(state_value)     \
246 |   {{                                  \
247 |     .shift = {                        \
248 |       .type = TSParseActionTypeShift, \
249 |       .state = (state_value),         \
250 |       .repetition = true              \
251 |     }                                 \
252 |   }}
253 | 
254 | #define SHIFT_EXTRA()                 \
255 |   {{                                  \
256 |     .shift = {                        \
257 |       .type = TSParseActionTypeShift, \
258 |       .extra = true                   \
259 |     }                                 \
260 |   }}
261 | 
262 | #define REDUCE(symbol_name, children, precedence, prod_id) \
263 |   {{                                                       \
264 |     .reduce = {                                            \
265 |       .type = TSParseActionTypeReduce,                     \
266 |       .symbol = symbol_name,                               \
267 |       .child_count = children,                             \
268 |       .dynamic_precedence = precedence,                    \
269 |       .production_id = prod_id                             \
270 |     },                                                     \
271 |   }}
272 | 
273 | #define RECOVER()                    \
274 |   {{                                 \
275 |     .type = TSParseActionTypeRecover \
276 |   }}
277 | 
278 | #define ACCEPT_INPUT()              \
279 |   {{                                \
280 |     .type = TSParseActionTypeAccept \
281 |   }}
282 | 
283 | #ifdef __cplusplus
284 | }
285 | #endif
286 | 
287 | #endif  // TREE_SITTER_PARSER_H_
288 | 


--------------------------------------------------------------------------------
/src/language.h:
--------------------------------------------------------------------------------
  1 | #ifndef TREE_SITTER_LANGUAGE_H_
  2 | #define TREE_SITTER_LANGUAGE_H_
  3 | 
  4 | #ifdef __cplusplus
  5 | extern "C" {
  6 | #endif
  7 | 
  8 | #include "./subtree.h"
  9 | #include "./parser.h"
 10 | 
 11 | #define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
 12 | 
 13 | #define LANGUAGE_VERSION_WITH_RESERVED_WORDS 15
 14 | #define LANGUAGE_VERSION_WITH_PRIMARY_STATES 14
 15 | 
 16 | typedef struct {
 17 |   const TSParseAction *actions;
 18 |   uint32_t action_count;
 19 |   bool is_reusable;
 20 | } TableEntry;
 21 | 
 22 | typedef struct {
 23 |   const TSLanguage *language;
 24 |   const uint16_t *data;
 25 |   const uint16_t *group_end;
 26 |   TSStateId state;
 27 |   uint16_t table_value;
 28 |   uint16_t section_index;
 29 |   uint16_t group_count;
 30 |   bool is_small_state;
 31 | 
 32 |   const TSParseAction *actions;
 33 |   TSSymbol symbol;
 34 |   TSStateId next_state;
 35 |   uint16_t action_count;
 36 | } LookaheadIterator;
 37 | 
 38 | void ts_language_table_entry(const TSLanguage *self, TSStateId state, TSSymbol symbol, TableEntry *result);
 39 | TSLexerMode ts_language_lex_mode_for_state(const TSLanguage *self, TSStateId state);
 40 | bool ts_language_is_reserved_word(const TSLanguage *self, TSStateId state, TSSymbol symbol);
 41 | TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *self, TSSymbol symbol);
 42 | TSSymbol ts_language_public_symbol(const TSLanguage *self, TSSymbol symbol);
 43 | 
 44 | static inline const TSParseAction *ts_language_actions(
 45 |   const TSLanguage *self,
 46 |   TSStateId state,
 47 |   TSSymbol symbol,
 48 |   uint32_t *count
 49 | ) {
 50 |   TableEntry entry;
 51 |   ts_language_table_entry(self, state, symbol, &entry);
 52 |   *count = entry.action_count;
 53 |   return entry.actions;
 54 | }
 55 | 
 56 | static inline bool ts_language_has_reduce_action(
 57 |   const TSLanguage *self,
 58 |   TSStateId state,
 59 |   TSSymbol symbol
 60 | ) {
 61 |   TableEntry entry;
 62 |   ts_language_table_entry(self, state, symbol, &entry);
 63 |   return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce;
 64 | }
 65 | 
 66 | // Lookup the table value for a given symbol and state.
 67 | //
 68 | // For non-terminal symbols, the table value represents a successor state.
 69 | // For terminal symbols, it represents an index in the actions table.
 70 | // For 'large' parse states, this is a direct lookup. For 'small' parse
 71 | // states, this requires searching through the symbol groups to find
 72 | // the given symbol.
 73 | static inline uint16_t ts_language_lookup(
 74 |   const TSLanguage *self,
 75 |   TSStateId state,
 76 |   TSSymbol symbol
 77 | ) {
 78 |   if (state >= self->large_state_count) {
 79 |     uint32_t index = self->small_parse_table_map[state - self->large_state_count];
 80 |     const uint16_t *data = &self->small_parse_table[index];
 81 |     uint16_t group_count = *(data++);
 82 |     for (unsigned i = 0; i < group_count; i++) {
 83 |       uint16_t section_value = *(data++);
 84 |       uint16_t symbol_count = *(data++);
 85 |       for (unsigned j = 0; j < symbol_count; j++) {
 86 |         if (*(data++) == symbol) return section_value;
 87 |       }
 88 |     }
 89 |     return 0;
 90 |   } else {
 91 |     return self->parse_table[state * self->symbol_count + symbol];
 92 |   }
 93 | }
 94 | 
 95 | static inline bool ts_language_has_actions(
 96 |   const TSLanguage *self,
 97 |   TSStateId state,
 98 |   TSSymbol symbol
 99 | ) {
100 |   return ts_language_lookup(self, state, symbol) != 0;
101 | }
102 | 
103 | // Iterate over all of the symbols that are valid in the given state.
104 | //
105 | // For 'large' parse states, this just requires iterating through
106 | // all possible symbols and checking the parse table for each one.
107 | // For 'small' parse states, this exploits the structure of the
108 | // table to only visit the valid symbols.
109 | static inline LookaheadIterator ts_language_lookaheads(
110 |   const TSLanguage *self,
111 |   TSStateId state
112 | ) {
113 |   bool is_small_state = state >= self->large_state_count;
114 |   const uint16_t *data;
115 |   const uint16_t *group_end = NULL;
116 |   uint16_t group_count = 0;
117 |   if (is_small_state) {
118 |     uint32_t index = self->small_parse_table_map[state - self->large_state_count];
119 |     data = &self->small_parse_table[index];
120 |     group_end = data + 1;
121 |     group_count = *data;
122 |   } else {
123 |     data = &self->parse_table[state * self->symbol_count] - 1;
124 |   }
125 |   return (LookaheadIterator) {
126 |     .language = self,
127 |     .data = data,
128 |     .group_end = group_end,
129 |     .group_count = group_count,
130 |     .is_small_state = is_small_state,
131 |     .symbol = UINT16_MAX,
132 |     .next_state = 0,
133 |   };
134 | }
135 | 
136 | static inline bool ts_lookahead_iterator__next(LookaheadIterator *self) {
137 |   // For small parse states, valid symbols are listed explicitly,
138 |   // grouped by their value. There's no need to look up the actions
139 |   // again until moving to the next group.
140 |   if (self->is_small_state) {
141 |     self->data++;
142 |     if (self->data == self->group_end) {
143 |       if (self->group_count == 0) return false;
144 |       self->group_count--;
145 |       self->table_value = *(self->data++);
146 |       unsigned symbol_count = *(self->data++);
147 |       self->group_end = self->data + symbol_count;
148 |       self->symbol = *self->data;
149 |     } else {
150 |       self->symbol = *self->data;
151 |       return true;
152 |     }
153 |   }
154 | 
155 |   // For large parse states, iterate through every symbol until one
156 |   // is found that has valid actions.
157 |   else {
158 |     do {
159 |       self->data++;
160 |       self->symbol++;
161 |       if (self->symbol >= self->language->symbol_count) return false;
162 |       self->table_value = *self->data;
163 |     } while (!self->table_value);
164 |   }
165 | 
166 |   // Depending on if the symbols is terminal or non-terminal, the table value either
167 |   // represents a list of actions or a successor state.
168 |   if (self->symbol < self->language->token_count) {
169 |     const TSParseActionEntry *entry = &self->language->parse_actions[self->table_value];
170 |     self->action_count = entry->entry.count;
171 |     self->actions = (const TSParseAction *)(entry + 1);
172 |     self->next_state = 0;
173 |   } else {
174 |     self->action_count = 0;
175 |     self->next_state = self->table_value;
176 |   }
177 |   return true;
178 | }
179 | 
180 | // Whether the state is a "primary state". If this returns false, it indicates that there exists
181 | // another state that behaves identically to this one with respect to query analysis.
182 | static inline bool ts_language_state_is_primary(
183 |   const TSLanguage *self,
184 |   TSStateId state
185 | ) {
186 |   if (self->abi_version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) {
187 |     return state == self->primary_state_ids[state];
188 |   } else {
189 |     return true;
190 |   }
191 | }
192 | 
193 | static inline const bool *ts_language_enabled_external_tokens(
194 |   const TSLanguage *self,
195 |   unsigned external_scanner_state
196 | ) {
197 |   if (external_scanner_state == 0) {
198 |     return NULL;
199 |   } else {
200 |     return self->external_scanner.states + self->external_token_count * external_scanner_state;
201 |   }
202 | }
203 | 
204 | static inline const TSSymbol *ts_language_alias_sequence(
205 |   const TSLanguage *self,
206 |   uint32_t production_id
207 | ) {
208 |   return production_id ?
209 |     &self->alias_sequences[production_id * self->max_alias_sequence_length] :
210 |     NULL;
211 | }
212 | 
213 | static inline TSSymbol ts_language_alias_at(
214 |   const TSLanguage *self,
215 |   uint32_t production_id,
216 |   uint32_t child_index
217 | ) {
218 |   return production_id ?
219 |     self->alias_sequences[production_id * self->max_alias_sequence_length + child_index] :
220 |     0;
221 | }
222 | 
223 | static inline void ts_language_field_map(
224 |   const TSLanguage *self,
225 |   uint32_t production_id,
226 |   const TSFieldMapEntry **start,
227 |   const TSFieldMapEntry **end
228 | ) {
229 |   if (self->field_count == 0) {
230 |     *start = NULL;
231 |     *end = NULL;
232 |     return;
233 |   }
234 | 
235 |   TSMapSlice slice = self->field_map_slices[production_id];
236 |   *start = &self->field_map_entries[slice.index];
237 |   *end = &self->field_map_entries[slice.index] + slice.length;
238 | }
239 | 
240 | static inline void ts_language_aliases_for_symbol(
241 |   const TSLanguage *self,
242 |   TSSymbol original_symbol,
243 |   const TSSymbol **start,
244 |   const TSSymbol **end
245 | ) {
246 |   *start = &self->public_symbol_map[original_symbol];
247 |   *end = *start + 1;
248 | 
249 |   unsigned idx = 0;
250 |   for (;;) {
251 |     TSSymbol symbol = self->alias_map[idx++];
252 |     if (symbol == 0 || symbol > original_symbol) break;
253 |     uint16_t count = self->alias_map[idx++];
254 |     if (symbol == original_symbol) {
255 |       *start = &self->alias_map[idx];
256 |       *end = &self->alias_map[idx + count];
257 |       break;
258 |     }
259 |     idx += count;
260 |   }
261 | }
262 | 
263 | static inline void ts_language_write_symbol_as_dot_string(
264 |   const TSLanguage *self,
265 |   FILE *f,
266 |   TSSymbol symbol
267 | ) {
268 |   const char *name = ts_language_symbol_name(self, symbol);
269 |   for (const char *chr = name; *chr; chr++) {
270 |     switch (*chr) {
271 |       case '"':
272 |       case '\\':
273 |         fputc('\\', f);
274 |         fputc(*chr, f);
275 |         break;
276 |       case '\n':
277 |         fputs("\\n", f);
278 |         break;
279 |       case '\t':
280 |         fputs("\\t", f);
281 |         break;
282 |       default:
283 |         fputc(*chr, f);
284 |         break;
285 |     }
286 |   }
287 | }
288 | 
289 | #ifdef __cplusplus
290 | }
291 | #endif
292 | 
293 | #endif  // TREE_SITTER_LANGUAGE_H_
294 | 


--------------------------------------------------------------------------------
/src/language.c:
--------------------------------------------------------------------------------
  1 | #include "./language.h"
  2 | #include "./wasm_store.h"
  3 | #include "tree_sitter/api.h"
  4 | #include <string.h>
  5 | 
  6 | const TSLanguage *ts_language_copy(const TSLanguage *self) {
  7 |   if (self && ts_language_is_wasm(self)) {
  8 |     ts_wasm_language_retain(self);
  9 |   }
 10 |   return self;
 11 | }
 12 | 
 13 | void ts_language_delete(const TSLanguage *self) {
 14 |   if (self && ts_language_is_wasm(self)) {
 15 |     ts_wasm_language_release(self);
 16 |   }
 17 | }
 18 | 
 19 | uint32_t ts_language_symbol_count(const TSLanguage *self) {
 20 |   return self->symbol_count + self->alias_count;
 21 | }
 22 | 
 23 | uint32_t ts_language_state_count(const TSLanguage *self) {
 24 |   return self->state_count;
 25 | }
 26 | 
 27 | const TSSymbol *ts_language_supertypes(const TSLanguage *self, uint32_t *length) {
 28 |   if (self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) {
 29 |     *length = self->supertype_count;
 30 |     return self->supertype_symbols;
 31 |   } else {
 32 |     *length = 0;
 33 |     return NULL;
 34 |   }
 35 | }
 36 | 
 37 | const TSSymbol *ts_language_subtypes(
 38 |   const TSLanguage *self,
 39 |   TSSymbol supertype,
 40 |   uint32_t *length
 41 | ) {
 42 |   if (self->abi_version < LANGUAGE_VERSION_WITH_RESERVED_WORDS || !ts_language_symbol_metadata(self, supertype).supertype) {
 43 |     *length = 0;
 44 |     return NULL;
 45 |   }
 46 | 
 47 |   TSMapSlice slice = self->supertype_map_slices[supertype];
 48 |   *length = slice.length;
 49 |   return &self->supertype_map_entries[slice.index];
 50 | }
 51 | 
 52 | uint32_t ts_language_version(const TSLanguage *self) {
 53 |   return self->abi_version;
 54 | }
 55 | 
 56 | uint32_t ts_language_abi_version(const TSLanguage *self) {
 57 |   return self->abi_version;
 58 | }
 59 | 
 60 | const TSLanguageMetadata *ts_language_metadata(const TSLanguage *self) {
 61 |     return self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS ? &self->metadata : NULL;
 62 | }
 63 | 
 64 | const char *ts_language_name(const TSLanguage *self) {
 65 |   return self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS ? self->name : NULL;
 66 | }
 67 | 
 68 | uint32_t ts_language_field_count(const TSLanguage *self) {
 69 |   return self->field_count;
 70 | }
 71 | 
 72 | void ts_language_table_entry(
 73 |   const TSLanguage *self,
 74 |   TSStateId state,
 75 |   TSSymbol symbol,
 76 |   TableEntry *result
 77 | ) {
 78 |   if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
 79 |     result->action_count = 0;
 80 |     result->is_reusable = false;
 81 |     result->actions = NULL;
 82 |   } else {
 83 |     ts_assert(symbol < self->token_count);
 84 |     uint32_t action_index = ts_language_lookup(self, state, symbol);
 85 |     const TSParseActionEntry *entry = &self->parse_actions[action_index];
 86 |     result->action_count = entry->entry.count;
 87 |     result->is_reusable = entry->entry.reusable;
 88 |     result->actions = (const TSParseAction *)(entry + 1);
 89 |   }
 90 | }
 91 | 
 92 | TSLexerMode ts_language_lex_mode_for_state(
 93 |    const TSLanguage *self,
 94 |    TSStateId state
 95 | ) {
 96 |   if (self->abi_version < 15) {
 97 |     TSLexMode mode = ((const TSLexMode *)self->lex_modes)[state];
 98 |     return (TSLexerMode) {
 99 |       .lex_state = mode.lex_state,
100 |       .external_lex_state = mode.external_lex_state,
101 |       .reserved_word_set_id = 0,
102 |     };
103 |   } else {
104 |     return self->lex_modes[state];
105 |   }
106 | }
107 | 
108 | bool ts_language_is_reserved_word(
109 |   const TSLanguage *self,
110 |   TSStateId state,
111 |   TSSymbol symbol
112 | ) {
113 |   TSLexerMode lex_mode = ts_language_lex_mode_for_state(self, state);
114 |   if (lex_mode.reserved_word_set_id > 0) {
115 |     unsigned start = lex_mode.reserved_word_set_id * self->max_reserved_word_set_size;
116 |     unsigned end = start + self->max_reserved_word_set_size;
117 |     for (unsigned i = start; i < end; i++) {
118 |       if (self->reserved_words[i] == symbol) return true;
119 |       if (self->reserved_words[i] == 0) break;
120 |     }
121 |   }
122 |   return false;
123 | }
124 | 
125 | TSSymbolMetadata ts_language_symbol_metadata(
126 |   const TSLanguage *self,
127 |   TSSymbol symbol
128 | ) {
129 |   if (symbol == ts_builtin_sym_error)  {
130 |     return (TSSymbolMetadata) {.visible = true, .named = true};
131 |   } else if (symbol == ts_builtin_sym_error_repeat) {
132 |     return (TSSymbolMetadata) {.visible = false, .named = false};
133 |   } else {
134 |     return self->symbol_metadata[symbol];
135 |   }
136 | }
137 | 
138 | TSSymbol ts_language_public_symbol(
139 |   const TSLanguage *self,
140 |   TSSymbol symbol
141 | ) {
142 |   if (symbol == ts_builtin_sym_error) return symbol;
143 |   return self->public_symbol_map[symbol];
144 | }
145 | 
146 | TSStateId ts_language_next_state(
147 |   const TSLanguage *self,
148 |   TSStateId state,
149 |   TSSymbol symbol
150 | ) {
151 |   if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
152 |     return 0;
153 |   } else if (symbol < self->token_count) {
154 |     uint32_t count;
155 |     const TSParseAction *actions = ts_language_actions(self, state, symbol, &count);
156 |     if (count > 0) {
157 |       TSParseAction action = actions[count - 1];
158 |       if (action.type == TSParseActionTypeShift) {
159 |         return action.shift.extra ? state : action.shift.state;
160 |       }
161 |     }
162 |     return 0;
163 |   } else {
164 |     return ts_language_lookup(self, state, symbol);
165 |   }
166 | }
167 | 
168 | const char *ts_language_symbol_name(
169 |   const TSLanguage *self,
170 |   TSSymbol symbol
171 | ) {
172 |   if (symbol == ts_builtin_sym_error) {
173 |     return "ERROR";
174 |   } else if (symbol == ts_builtin_sym_error_repeat) {
175 |     return "_ERROR";
176 |   } else if (symbol < ts_language_symbol_count(self)) {
177 |     return self->symbol_names[symbol];
178 |   } else {
179 |     return NULL;
180 |   }
181 | }
182 | 
183 | TSSymbol ts_language_symbol_for_name(
184 |   const TSLanguage *self,
185 |   const char *string,
186 |   uint32_t length,
187 |   bool is_named
188 | ) {
189 |   if (!strncmp(string, "ERROR", length)) return ts_builtin_sym_error;
190 |   uint16_t count = (uint16_t)ts_language_symbol_count(self);
191 |   for (TSSymbol i = 0; i < count; i++) {
192 |     TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i);
193 |     if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue;
194 |     const char *symbol_name = self->symbol_names[i];
195 |     if (!strncmp(symbol_name, string, length) && !symbol_name[length]) {
196 |       return self->public_symbol_map[i];
197 |     }
198 |   }
199 |   return 0;
200 | }
201 | 
202 | TSSymbolType ts_language_symbol_type(
203 |   const TSLanguage *self,
204 |   TSSymbol symbol
205 | ) {
206 |   TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol);
207 |   if (metadata.named && metadata.visible) {
208 |     return TSSymbolTypeRegular;
209 |   } else if (metadata.visible) {
210 |     return TSSymbolTypeAnonymous;
211 |   } else if (metadata.supertype) {
212 |     return TSSymbolTypeSupertype;
213 |   } else {
214 |     return TSSymbolTypeAuxiliary;
215 |   }
216 | }
217 | 
218 | const char *ts_language_field_name_for_id(
219 |   const TSLanguage *self,
220 |   TSFieldId id
221 | ) {
222 |   uint32_t count = ts_language_field_count(self);
223 |   if (count && id <= count) {
224 |     return self->field_names[id];
225 |   } else {
226 |     return NULL;
227 |   }
228 | }
229 | 
230 | TSFieldId ts_language_field_id_for_name(
231 |   const TSLanguage *self,
232 |   const char *name,
233 |   uint32_t name_length
234 | ) {
235 |   uint16_t count = (uint16_t)ts_language_field_count(self);
236 |   for (TSSymbol i = 1; i < count + 1; i++) {
237 |     switch (strncmp(name, self->field_names[i], name_length)) {
238 |       case 0:
239 |         if (self->field_names[i][name_length] == 0) return i;
240 |         break;
241 |       case -1:
242 |         return 0;
243 |       default:
244 |         break;
245 |     }
246 |   }
247 |   return 0;
248 | }
249 | 
250 | TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, TSStateId state) {
251 |   if (state >= self->state_count) return NULL;
252 |   LookaheadIterator *iterator = ts_malloc(sizeof(LookaheadIterator));
253 |   *iterator = ts_language_lookaheads(self, state);
254 |   return (TSLookaheadIterator *)iterator;
255 | }
256 | 
257 | void ts_lookahead_iterator_delete(TSLookaheadIterator *self) {
258 |   ts_free(self);
259 | }
260 | 
261 | bool ts_lookahead_iterator_reset_state(TSLookaheadIterator * self, TSStateId state) {
262 |   LookaheadIterator *iterator = (LookaheadIterator *)self;
263 |   if (state >= iterator->language->state_count) return false;
264 |   *iterator = ts_language_lookaheads(iterator->language, state);
265 |   return true;
266 | }
267 | 
268 | const TSLanguage *ts_lookahead_iterator_language(const TSLookaheadIterator *self) {
269 |   const LookaheadIterator *iterator = (const LookaheadIterator *)self;
270 |   return iterator->language;
271 | }
272 | 
273 | bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const TSLanguage *language, TSStateId state) {
274 |   if (state >= language->state_count) return false;
275 |   LookaheadIterator *iterator = (LookaheadIterator *)self;
276 |   *iterator = ts_language_lookaheads(language, state);
277 |   return true;
278 | }
279 | 
280 | bool ts_lookahead_iterator_next(TSLookaheadIterator *self) {
281 |   LookaheadIterator *iterator = (LookaheadIterator *)self;
282 |   return ts_lookahead_iterator__next(iterator);
283 | }
284 | 
285 | TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self) {
286 |   const LookaheadIterator *iterator = (const LookaheadIterator *)self;
287 |   return iterator->symbol;
288 | }
289 | 
290 | const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self) {
291 |   const LookaheadIterator *iterator = (const LookaheadIterator *)self;
292 |   return ts_language_symbol_name(iterator->language, iterator->symbol);
293 | }
294 | 


--------------------------------------------------------------------------------
/src/array.h:
--------------------------------------------------------------------------------
  1 | #ifndef TREE_SITTER_ARRAY_H_
  2 | #define TREE_SITTER_ARRAY_H_
  3 | 
  4 | #ifdef __cplusplus
  5 | extern "C" {
  6 | #endif
  7 | 
  8 | #include "./alloc.h"
  9 | #include "./ts_assert.h"
 10 | 
 11 | #include <stdbool.h>
 12 | #include <stdint.h>
 13 | #include <stdlib.h>
 14 | #include <string.h>
 15 | 
 16 | #ifdef _MSC_VER
 17 | #pragma warning(push)
 18 | #pragma warning(disable : 4101)
 19 | #elif defined(__GNUC__) || defined(__clang__)
 20 | #pragma GCC diagnostic push
 21 | #pragma GCC diagnostic ignored "-Wunused-variable"
 22 | #endif
 23 | 
 24 | #define Array(T)       \
 25 |   struct {             \
 26 |     T *contents;       \
 27 |     uint32_t size;     \
 28 |     uint32_t capacity; \
 29 |   }
 30 | 
 31 | /// Initialize an array.
 32 | #define array_init(self) \
 33 |   ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
 34 | 
 35 | /// Create an empty array.
 36 | #define array_new() \
 37 |   { NULL, 0, 0 }
 38 | 
 39 | /// Get a pointer to the element at a given `index` in the array.
 40 | #define array_get(self, _index) \
 41 |   (ts_assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
 42 | 
 43 | /// Get a pointer to the first element in the array.
 44 | #define array_front(self) array_get(self, 0)
 45 | 
 46 | /// Get a pointer to the last element in the array.
 47 | #define array_back(self) array_get(self, (self)->size - 1)
 48 | 
 49 | /// Clear the array, setting its size to zero. Note that this does not free any
 50 | /// memory allocated for the array's contents.
 51 | #define array_clear(self) ((self)->size = 0)
 52 | 
 53 | /// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
 54 | /// less than the array's current capacity, this function has no effect.
 55 | #define array_reserve(self, new_capacity) \
 56 |   _array__reserve((Array *)(self), array_elem_size(self), new_capacity)
 57 | 
 58 | /// Free any memory allocated for this array. Note that this does not free any
 59 | /// memory allocated for the array's contents.
 60 | #define array_delete(self) _array__delete((Array *)(self))
 61 | 
 62 | /// Push a new `element` onto the end of the array.
 63 | #define array_push(self, element)                            \
 64 |   (_array__grow((Array *)(self), 1, array_elem_size(self)), \
 65 |    (self)->contents[(self)->size++] = (element))
 66 | 
 67 | /// Increase the array's size by `count` elements.
 68 | /// New elements are zero-initialized.
 69 | #define array_grow_by(self, count) \
 70 |   do { \
 71 |     if ((count) == 0) break; \
 72 |     _array__grow((Array *)(self), count, array_elem_size(self)); \
 73 |     memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \
 74 |     (self)->size += (count); \
 75 |   } while (0)
 76 | 
 77 | /// Append all elements from one array to the end of another.
 78 | #define array_push_all(self, other)                                       \
 79 |   array_extend((self), (other)->size, (other)->contents)
 80 | 
 81 | /// Append `count` elements to the end of the array, reading their values from the
 82 | /// `contents` pointer.
 83 | #define array_extend(self, count, contents)                    \
 84 |   _array__splice(                                               \
 85 |     (Array *)(self), array_elem_size(self), (self)->size, \
 86 |     0, count,  contents                                        \
 87 |   )
 88 | 
 89 | /// Remove `old_count` elements from the array starting at the given `index`. At
 90 | /// the same index, insert `new_count` new elements, reading their values from the
 91 | /// `new_contents` pointer.
 92 | #define array_splice(self, _index, old_count, new_count, new_contents)  \
 93 |   _array__splice(                                                       \
 94 |     (Array *)(self), array_elem_size(self), _index,                \
 95 |     old_count, new_count, new_contents                                 \
 96 |   )
 97 | 
 98 | /// Insert one `element` into the array at the given `index`.
 99 | #define array_insert(self, _index, element) \
100 |   _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
101 | 
102 | /// Remove one element from the array at the given `index`.
103 | #define array_erase(self, _index) \
104 |   _array__erase((Array *)(self), array_elem_size(self), _index)
105 | 
106 | /// Pop the last element off the array, returning the element by value.
107 | #define array_pop(self) ((self)->contents[--(self)->size])
108 | 
109 | /// Assign the contents of one array to another, reallocating if necessary.
110 | #define array_assign(self, other) \
111 |   _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
112 | 
113 | /// Swap one array with another
114 | #define array_swap(self, other) \
115 |   _array__swap((Array *)(self), (Array *)(other))
116 | 
117 | /// Get the size of the array contents
118 | #define array_elem_size(self) (sizeof *(self)->contents)
119 | 
120 | /// Search a sorted array for a given `needle` value, using the given `compare`
121 | /// callback to determine the order.
122 | ///
123 | /// If an existing element is found to be equal to `needle`, then the `index`
124 | /// out-parameter is set to the existing value's index, and the `exists`
125 | /// out-parameter is set to true. Otherwise, `index` is set to an index where
126 | /// `needle` should be inserted in order to preserve the sorting, and `exists`
127 | /// is set to false.
128 | #define array_search_sorted_with(self, compare, needle, _index, _exists) \
129 |   _array__search_sorted(self, 0, compare, , needle, _index, _exists)
130 | 
131 | /// Search a sorted array for a given `needle` value, using integer comparisons
132 | /// of a given struct field (specified with a leading dot) to determine the order.
133 | ///
134 | /// See also `array_search_sorted_with`.
135 | #define array_search_sorted_by(self, field, needle, _index, _exists) \
136 |   _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
137 | 
138 | /// Insert a given `value` into a sorted array, using the given `compare`
139 | /// callback to determine the order.
140 | #define array_insert_sorted_with(self, compare, value) \
141 |   do { \
142 |     unsigned _index, _exists; \
143 |     array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
144 |     if (!_exists) array_insert(self, _index, value); \
145 |   } while (0)
146 | 
147 | /// Insert a given `value` into a sorted array, using integer comparisons of
148 | /// a given struct field (specified with a leading dot) to determine the order.
149 | ///
150 | /// See also `array_search_sorted_by`.
151 | #define array_insert_sorted_by(self, field, value) \
152 |   do { \
153 |     unsigned _index, _exists; \
154 |     array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
155 |     if (!_exists) array_insert(self, _index, value); \
156 |   } while (0)
157 | 
158 | // Private
159 | 
160 | typedef Array(void) Array;
161 | 
162 | /// This is not what you're looking for, see `array_delete`.
163 | static inline void _array__delete(Array *self) {
164 |   if (self->contents) {
165 |     ts_free(self->contents);
166 |     self->contents = NULL;
167 |     self->size = 0;
168 |     self->capacity = 0;
169 |   }
170 | }
171 | 
172 | /// This is not what you're looking for, see `array_erase`.
173 | static inline void _array__erase(Array *self, size_t element_size,
174 |                                 uint32_t index) {
175 |   ts_assert(index < self->size);
176 |   char *contents = (char *)self->contents;
177 |   memmove(contents + index * element_size, contents + (index + 1) * element_size,
178 |           (self->size - index - 1) * element_size);
179 |   self->size--;
180 | }
181 | 
182 | /// This is not what you're looking for, see `array_reserve`.
183 | static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
184 |   if (new_capacity > self->capacity) {
185 |     if (self->contents) {
186 |       self->contents = ts_realloc(self->contents, new_capacity * element_size);
187 |     } else {
188 |       self->contents = ts_malloc(new_capacity * element_size);
189 |     }
190 |     self->capacity = new_capacity;
191 |   }
192 | }
193 | 
194 | /// This is not what you're looking for, see `array_assign`.
195 | static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
196 |   _array__reserve(self, element_size, other->size);
197 |   self->size = other->size;
198 |   memcpy(self->contents, other->contents, self->size * element_size);
199 | }
200 | 
201 | /// This is not what you're looking for, see `array_swap`.
202 | static inline void _array__swap(Array *self, Array *other) {
203 |   Array swap = *other;
204 |   *other = *self;
205 |   *self = swap;
206 | }
207 | 
208 | /// This is not what you're looking for, see `array_push` or `array_grow_by`.
209 | static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
210 |   uint32_t new_size = self->size + count;
211 |   if (new_size > self->capacity) {
212 |     uint32_t new_capacity = self->capacity * 2;
213 |     if (new_capacity < 8) new_capacity = 8;
214 |     if (new_capacity < new_size) new_capacity = new_size;
215 |     _array__reserve(self, element_size, new_capacity);
216 |   }
217 | }
218 | 
219 | /// This is not what you're looking for, see `array_splice`.
220 | static inline void _array__splice(Array *self, size_t element_size,
221 |                                  uint32_t index, uint32_t old_count,
222 |                                  uint32_t new_count, const void *elements) {
223 |   uint32_t new_size = self->size + new_count - old_count;
224 |   uint32_t old_end = index + old_count;
225 |   uint32_t new_end = index + new_count;
226 |   ts_assert(old_end <= self->size);
227 | 
228 |   _array__reserve(self, element_size, new_size);
229 | 
230 |   char *contents = (char *)self->contents;
231 |   if (self->size > old_end) {
232 |     memmove(
233 |       contents + new_end * element_size,
234 |       contents + old_end * element_size,
235 |       (self->size - old_end) * element_size
236 |     );
237 |   }
238 |   if (new_count > 0) {
239 |     if (elements) {
240 |       memcpy(
241 |         (contents + index * element_size),
242 |         elements,
243 |         new_count * element_size
244 |       );
245 |     } else {
246 |       memset(
247 |         (contents + index * element_size),
248 |         0,
249 |         new_count * element_size
250 |       );
251 |     }
252 |   }
253 |   self->size += new_count - old_count;
254 | }
255 | 
256 | /// A binary search routine, based on Rust's `std::slice::binary_search_by`.
257 | /// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
258 | #define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
259 |   do { \
260 |     *(_index) = start; \
261 |     *(_exists) = false; \
262 |     uint32_t size = (self)->size - *(_index); \
263 |     if (size == 0) break; \
264 |     int comparison; \
265 |     while (size > 1) { \
266 |       uint32_t half_size = size / 2; \
267 |       uint32_t mid_index = *(_index) + half_size; \
268 |       comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
269 |       if (comparison <= 0) *(_index) = mid_index; \
270 |       size -= half_size; \
271 |     } \
272 |     comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
273 |     if (comparison == 0) *(_exists) = true; \
274 |     else if (comparison < 0) *(_index) += 1; \
275 |   } while (0)
276 | 
277 | /// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
278 | /// parameter by reference in order to work with the generic sorting function above.
279 | #define _compare_int(a, b) ((int)*(a) - (int)(b))
280 | 
281 | #ifdef _MSC_VER
282 | #pragma warning(pop)
283 | #elif defined(__GNUC__) || defined(__clang__)
284 | #pragma GCC diagnostic pop
285 | #endif
286 | 
287 | #ifdef __cplusplus
288 | }
289 | #endif
290 | 
291 | #endif  // TREE_SITTER_ARRAY_H_
292 | 


--------------------------------------------------------------------------------
/node.go:
--------------------------------------------------------------------------------
  1 | package tree_sitter
  2 | 
  3 | /*
  4 | #cgo CFLAGS: -Iinclude -Isrc -std=c11 -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE
  5 | #include <tree_sitter/api.h>
  6 | */
  7 | import "C"
  8 | import "unsafe"
  9 | 
 10 | // A single node within a syntax [Tree].
 11 | // Note that this is a C-compatible struct
 12 | type Node struct {
 13 | 	_inner C.TSNode
 14 | }
 15 | 
 16 | func newNode(node C.TSNode) *Node {
 17 | 	if node.id == nil {
 18 | 		return nil
 19 | 	}
 20 | 	return &Node{_inner: node}
 21 | }
 22 | 
 23 | // Get a numeric id for this node that is unique.
 24 | //
 25 | // Within a given syntax tree, no two nodes have the same id. However, if
 26 | // a new tree is created based on an older tree, and a node from the old
 27 | // tree is reused in the process, then that node will have the same id in
 28 | // both trees.
 29 | func (n *Node) Id() uintptr {
 30 | 	return uintptr(n._inner.id)
 31 | }
 32 | 
 33 | // Get this node's type as a numerical id.
 34 | func (n *Node) KindId() uint16 {
 35 | 	return uint16(C.ts_node_symbol(n._inner))
 36 | }
 37 | 
 38 | // Get the node's type as a numerical id as it appears in the grammar
 39 | // ignoring aliases.
 40 | func (n *Node) GrammarId() uint16 {
 41 | 	return uint16(C.ts_node_grammar_symbol(n._inner))
 42 | }
 43 | 
 44 | // Get this node's type as a string.
 45 | func (n *Node) Kind() string {
 46 | 	return C.GoString(C.ts_node_type(n._inner))
 47 | }
 48 | 
 49 | // Get this node's symbol name as it appears in the grammar ignoring
 50 | // aliases as a string.
 51 | func (n *Node) GrammarName() string {
 52 | 	return C.GoString(C.ts_node_grammar_type(n._inner))
 53 | }
 54 | 
 55 | // Get the [Language] that was used to parse this node's syntax tree.
 56 | func (n *Node) Language() *Language {
 57 | 	return &Language{Inner: C.ts_node_language(n._inner)}
 58 | }
 59 | 
 60 | // Check if this node is *named*.
 61 | //
 62 | // Named nodes correspond to named rules in the grammar, whereas
 63 | // *anonymous* nodes correspond to string literals in the grammar.
 64 | func (n *Node) IsNamed() bool {
 65 | 	return bool(C.ts_node_is_named(n._inner))
 66 | }
 67 | 
 68 | // Check if this node is *extra*.
 69 | //
 70 | // Extra nodes represent things like comments, which are not required in the
 71 | // grammar, but can appear anywhere.
 72 | func (n *Node) IsExtra() bool {
 73 | 	return bool(C.ts_node_is_extra(n._inner))
 74 | }
 75 | 
 76 | // Check if this node has been edited.
 77 | func (n *Node) HasChanges() bool {
 78 | 	return bool(C.ts_node_has_changes(n._inner))
 79 | }
 80 | 
 81 | // Check if this node represents a syntax error or contains any syntax
 82 | // errors anywhere within it.
 83 | func (n *Node) HasError() bool {
 84 | 	return bool(C.ts_node_has_error(n._inner))
 85 | }
 86 | 
 87 | // Check if this node represents a syntax error.
 88 | //
 89 | // Syntax errors represent parts of the code that could not be incorporated
 90 | // into a valid syntax tree.
 91 | func (n *Node) IsError() bool {
 92 | 	return bool(C.ts_node_is_error(n._inner))
 93 | }
 94 | 
 95 | // Get this node's parse state.
 96 | func (n *Node) ParseState() uint16 {
 97 | 	return uint16(C.ts_node_parse_state(n._inner))
 98 | }
 99 | 
100 | // Get the parse state after this node.
101 | func (n *Node) NextParseState() uint16 {
102 | 	return uint16(C.ts_node_next_parse_state(n._inner))
103 | }
104 | 
105 | // Check if this node is *missing*.
106 | //
107 | // Missing nodes are inserted by the parser in order to recover from
108 | // certain kinds of syntax errors.
109 | func (n *Node) IsMissing() bool {
110 | 	return bool(C.ts_node_is_missing(n._inner))
111 | }
112 | 
113 | // Get the byte offsets where this node starts.
114 | func (n *Node) StartByte() uint {
115 | 	return uint(C.ts_node_start_byte(n._inner))
116 | }
117 | 
118 | // Get the byte offsets where this node end.
119 | func (n *Node) EndByte() uint {
120 | 	return uint(C.ts_node_end_byte(n._inner))
121 | }
122 | 
123 | // Get the byte range of source code that this node represents.
124 | func (n *Node) ByteRange() (uint, uint) {
125 | 	return n.StartByte(), n.EndByte()
126 | }
127 | 
128 | // Get the range of source code that this node represents, both in terms of
129 | // raw bytes and of row/column coordinates.
130 | func (n *Node) Range() Range {
131 | 	return Range{
132 | 		StartByte:  n.StartByte(),
133 | 		EndByte:    n.EndByte(),
134 | 		StartPoint: n.StartPosition(),
135 | 		EndPoint:   n.EndPosition(),
136 | 	}
137 | }
138 | 
139 | // Get this node's start position in terms of rows and columns.
140 | func (n *Node) StartPosition() Point {
141 | 	p := Point{}
142 | 	p.fromTSPoint(C.ts_node_start_point(n._inner))
143 | 	return p
144 | }
145 | 
146 | // Get this node's end position in terms of rows and columns.
147 | func (n *Node) EndPosition() Point {
148 | 	p := Point{}
149 | 	p.fromTSPoint(C.ts_node_end_point(n._inner))
150 | 	return p
151 | }
152 | 
153 | // Get the node's child at the given index, where zero represents the first
154 | // child.
155 | //
156 | // This method is fairly fast, but its cost is technically log(i), so if
157 | // you might be iterating over a long list of children, you should use
158 | // [Node.Children] instead.
159 | func (n *Node) Child(i uint) *Node {
160 | 	return newNode(C.ts_node_child(n._inner, C.uint(i)))
161 | }
162 | 
163 | // Get this node's number of children.
164 | func (n *Node) ChildCount() uint {
165 | 	return uint(C.ts_node_child_count(n._inner))
166 | }
167 | 
168 | // Get this node's *named* child at the given index.
169 | //
170 | // See also [Node.IsNamed].
171 | // This method is fairly fast, but its cost is technically log(i), so if
172 | // you might be iterating over a long list of children, you should use
173 | // [Node.NamedChildren] instead.
174 | func (n *Node) NamedChild(i uint) *Node {
175 | 	return newNode(C.ts_node_named_child(n._inner, C.uint(i)))
176 | }
177 | 
178 | // Get this node's number of *named* children.
179 | //
180 | // See also [Node.IsNamed].
181 | func (n *Node) NamedChildCount() uint {
182 | 	return uint(C.ts_node_named_child_count(n._inner))
183 | }
184 | 
185 | // Get the first child with the given field name.
186 | //
187 | // If multiple children may have the same field name, access them using
188 | // [Node.ChildrenByFieldName]
189 | func (n *Node) ChildByFieldName(fieldName string) *Node {
190 | 	cFieldName := C.CString(fieldName)
191 | 	defer go_free(unsafe.Pointer(cFieldName))
192 | 	return newNode(C.ts_node_child_by_field_name(n._inner, cFieldName, C.uint32_t(len(fieldName))))
193 | }
194 | 
195 | // Get this node's child with the given numerical field id.
196 | //
197 | // See also [Node.ChildByFieldName]. You can
198 | // convert a field name to an id using [Language.FieldIdForName].
199 | func (n *Node) ChildByFieldId(fieldId uint16) *Node {
200 | 	return newNode(C.ts_node_child_by_field_id(n._inner, C.uint16_t(fieldId)))
201 | }
202 | 
203 | // Get the field name of this node's child at the given index.
204 | func (n *Node) FieldNameForChild(childIndex uint32) string {
205 | 	ptr := C.ts_node_field_name_for_child(n._inner, C.uint32_t(childIndex))
206 | 	if ptr == nil {
207 | 		return ""
208 | 	}
209 | 	return C.GoString(ptr)
210 | }
211 | 
212 | // Get the field name of this node's named child at the given index.
213 | func (n *Node) FieldNameForNamedChild(namedChildIndex uint32) string {
214 | 	ptr := C.ts_node_field_name_for_named_child(n._inner, C.uint32_t(namedChildIndex))
215 | 	if ptr == nil {
216 | 		return ""
217 | 	}
218 | 	return C.GoString(ptr)
219 | }
220 | 
221 | // Iterate over this node's children.
222 | //
223 | // A [TreeCursor] is used to retrieve the children efficiently. Obtain
224 | // a [TreeCursor] by calling [Tree.Walk] or [Node.Walk]. To avoid
225 | // unnecessary allocations, you should reuse the same cursor for
226 | // subsequent calls to this method.
227 | //
228 | // If you're walking the tree recursively, you may want to use the
229 | // [TreeCursor] APIs directly instead.
230 | func (n *Node) Children(cursor *TreeCursor) []Node {
231 | 	cursor.Reset(*n)
232 | 	cursor.GotoFirstChild()
233 | 	childCount := n.ChildCount()
234 | 	result := make([]Node, 0, childCount)
235 | 	for i := 0; i < int(childCount); i++ {
236 | 		result = append(result, *cursor.Node())
237 | 		cursor.GotoNextSibling()
238 | 	}
239 | 	return result
240 | }
241 | 
242 | // Iterate over this node's named children.
243 | //
244 | // See also [Node.Children].
245 | func (n *Node) NamedChildren(cursor *TreeCursor) []Node {
246 | 	cursor.Reset(*n)
247 | 	cursor.GotoFirstChild()
248 | 	namedChildCount := n.NamedChildCount()
249 | 	result := make([]Node, 0, namedChildCount)
250 | 	for i := 0; i < int(namedChildCount); i++ {
251 | 		for !cursor.Node().IsNamed() {
252 | 			if !cursor.GotoNextSibling() {
253 | 				break
254 | 			}
255 | 		}
256 | 		result = append(result, *cursor.Node())
257 | 		cursor.GotoNextSibling()
258 | 	}
259 | 	return result
260 | }
261 | 
262 | // Iterate over this node's children with a given field name.
263 | //
264 | // See also [Node.Children].
265 | func (n *Node) ChildrenByFieldName(fieldName string, cursor *TreeCursor) []Node {
266 | 	fieldId := n.Language().FieldIdForName(fieldName)
267 | 	done := fieldId == 0
268 | 	if !done {
269 | 		cursor.Reset(*n)
270 | 		cursor.GotoFirstChild()
271 | 	}
272 | 	result := make([]Node, 0)
273 | 	for !done {
274 | 		for cursor.FieldId() != fieldId {
275 | 			if !cursor.GotoNextSibling() {
276 | 				return result
277 | 			}
278 | 		}
279 | 		result = append(result, *cursor.Node())
280 | 		if !cursor.GotoNextSibling() {
281 | 			done = true
282 | 		}
283 | 	}
284 | 	return result
285 | }
286 | 
287 | // Get this node's immediate parent.
288 | // Prefer [Node.ChildWithDescendant]
289 | // for iterating over this node's ancestors.
290 | func (n *Node) Parent() *Node {
291 | 	return newNode(C.ts_node_parent(n._inner))
292 | }
293 | 
294 | // Get the node that contains `descendant`.
295 | // Note that this can return `descendant` itself.
296 | func (n *Node) ChildWithDescendant(descendant *Node) *Node {
297 | 	return newNode(C.ts_node_child_with_descendant(n._inner, descendant._inner))
298 | }
299 | 
300 | // Get this node's next sibling.
301 | func (n *Node) NextSibling() *Node {
302 | 	return newNode(C.ts_node_next_sibling(n._inner))
303 | }
304 | 
305 | // Get this node's previous sibling.
306 | func (n *Node) PrevSibling() *Node {
307 | 	return newNode(C.ts_node_prev_sibling(n._inner))
308 | }
309 | 
310 | // Get this node's next named sibling.
311 | func (n *Node) NextNamedSibling() *Node {
312 | 	return newNode(C.ts_node_next_named_sibling(n._inner))
313 | }
314 | 
315 | // Get this node's previous named sibling.
316 | func (n *Node) PrevNamedSibling() *Node {
317 | 	return newNode(C.ts_node_prev_named_sibling(n._inner))
318 | }
319 | 
320 | // Get the node's first child that contains or starts after the given byte offset.
321 | func (n *Node) FirstChildForByte(byteOffset uint) *Node {
322 | 	return newNode(C.ts_node_first_child_for_byte(n._inner, C.uint(byteOffset)))
323 | }
324 | 
325 | // Get the node's first named child that contains or starts after the given byte offset.
326 | func (n *Node) FirstNamedChildForByte(byteOffset uint) *Node {
327 | 	return newNode(C.ts_node_first_named_child_for_byte(n._inner, C.uint(byteOffset)))
328 | }
329 | 
330 | // Get the node's number of descendants, including one for the node itself.
331 | func (n *Node) DescendantCount() uint {
332 | 	return uint(C.ts_node_descendant_count(n._inner))
333 | }
334 | 
335 | // Get the smallest node within this node that spans the given range.
336 | func (n *Node) DescendantForByteRange(start, end uint) *Node {
337 | 	return newNode(C.ts_node_descendant_for_byte_range(n._inner, C.uint(start), C.uint(end)))
338 | }
339 | 
340 | // Get the smallest named node within this node that spans the given range.
341 | func (n *Node) NamedDescendantForByteRange(start, end uint) *Node {
342 | 	return newNode(C.ts_node_named_descendant_for_byte_range(n._inner, C.uint(start), C.uint(end)))
343 | }
344 | 
345 | // Get the smallest node within this node that spans the given range.
346 | func (n *Node) DescendantForPointRange(start, end Point) *Node {
347 | 	return newNode(C.ts_node_descendant_for_point_range(n._inner, start.toTSPoint(), end.toTSPoint()))
348 | }
349 | 
350 | // Get the smallest named node within this node that spans the given range.
351 | func (n *Node) NamedDescendantForPointRange(start, end Point) *Node {
352 | 	return newNode(C.ts_node_named_descendant_for_point_range(n._inner, start.toTSPoint(), end.toTSPoint()))
353 | }
354 | 
355 | func (n *Node) ToSexp() string {
356 | 	cString := C.ts_node_string(n._inner)
357 | 	result := C.GoString(cString)
358 | 	go_free(unsafe.Pointer(cString))
359 | 	return result
360 | }
361 | 
362 | func (n *Node) Utf8Text(source []byte) string {
363 | 	return string(source[n.StartByte():n.EndByte()])
364 | }
365 | 
366 | func (n *Node) Utf16Text(source []uint16) []uint16 {
367 | 	return source[n.StartByte():n.EndByte()]
368 | }
369 | 
370 | // Create a new [TreeCursor] starting from this node.
371 | //
372 | // Note that the given node is considered the root of the cursor,
373 | // and the cursor cannot walk outside this node.
374 | func (n *Node) Walk() *TreeCursor {
375 | 	return newTreeCursor(*n)
376 | }
377 | 
378 | // Edit this node to keep it in-sync with source code that has been edited.
379 | //
380 | // This function is only rarely needed. When you edit a syntax tree with
381 | // the [Tree.Edit] method, all of the nodes that you retrieve from
382 | // the tree afterward will already reflect the edit. You only need to
383 | // use [Node.Edit] when you have a specific [Node] instance that
384 | // you want to keep and continue to use after an edit.
385 | func (n *Node) Edit(edit *InputEdit) {
386 | 	C.ts_node_edit(&n._inner, edit.toTSInputEdit())
387 | }
388 | 
389 | // Check if two nodes are identical.
390 | func (n *Node) Equals(other Node) bool {
391 | 	return bool(C.ts_node_eq(n._inner, other._inner))
392 | }
393 | 


--------------------------------------------------------------------------------
/src/subtree.h:
--------------------------------------------------------------------------------
  1 | #ifndef TREE_SITTER_SUBTREE_H_
  2 | #define TREE_SITTER_SUBTREE_H_
  3 | 
  4 | #ifdef __cplusplus
  5 | extern "C" {
  6 | #endif
  7 | 
  8 | #include <limits.h>
  9 | #include <stdbool.h>
 10 | #include <stdio.h>
 11 | #include "./length.h"
 12 | #include "./array.h"
 13 | #include "./error_costs.h"
 14 | #include "./host.h"
 15 | #include "tree_sitter/api.h"
 16 | #include "./parser.h"
 17 | 
 18 | #define TS_TREE_STATE_NONE USHRT_MAX
 19 | #define NULL_SUBTREE ((Subtree) {.ptr = NULL})
 20 | 
 21 | // The serialized state of an external scanner.
 22 | //
 23 | // Every time an external token subtree is created after a call to an
 24 | // external scanner, the scanner's `serialize` function is called to
 25 | // retrieve a serialized copy of its state. The bytes are then copied
 26 | // onto the subtree itself so that the scanner's state can later be
 27 | // restored using its `deserialize` function.
 28 | //
 29 | // Small byte arrays are stored inline, and long ones are allocated
 30 | // separately on the heap.
 31 | typedef struct {
 32 |   union {
 33 |     char *long_data;
 34 |     char short_data[24];
 35 |   };
 36 |   uint32_t length;
 37 | } ExternalScannerState;
 38 | 
 39 | // A compact representation of a subtree.
 40 | //
 41 | // This representation is used for small leaf nodes that are not
 42 | // errors, and were not created by an external scanner.
 43 | //
 44 | // The idea behind the layout of this struct is that the `is_inline`
 45 | // bit will fall exactly into the same location as the least significant
 46 | // bit of the pointer in `Subtree` or `MutableSubtree`, respectively.
 47 | // Because of alignment, for any valid pointer this will be 0, giving
 48 | // us the opportunity to make use of this bit to signify whether to use
 49 | // the pointer or the inline struct.
 50 | typedef struct SubtreeInlineData SubtreeInlineData;
 51 | 
 52 | #define SUBTREE_BITS    \
 53 |   bool visible : 1;     \
 54 |   bool named : 1;       \
 55 |   bool extra : 1;       \
 56 |   bool has_changes : 1; \
 57 |   bool is_missing : 1;  \
 58 |   bool is_keyword : 1;
 59 | 
 60 | #define SUBTREE_SIZE           \
 61 |   uint8_t padding_columns;     \
 62 |   uint8_t padding_rows : 4;    \
 63 |   uint8_t lookahead_bytes : 4; \
 64 |   uint8_t padding_bytes;       \
 65 |   uint8_t size_bytes;
 66 | 
 67 | #if TS_BIG_ENDIAN
 68 | #if TS_PTR_SIZE == 32
 69 | 
 70 | struct SubtreeInlineData {
 71 |   uint16_t parse_state;
 72 |   uint8_t symbol;
 73 |   SUBTREE_BITS
 74 |   bool unused : 1;
 75 |   bool is_inline : 1;
 76 |   SUBTREE_SIZE
 77 | };
 78 | 
 79 | #else
 80 | 
 81 | struct SubtreeInlineData {
 82 |   SUBTREE_SIZE
 83 |   uint16_t parse_state;
 84 |   uint8_t symbol;
 85 |   SUBTREE_BITS
 86 |   bool unused : 1;
 87 |   bool is_inline : 1;
 88 | };
 89 | 
 90 | #endif
 91 | #else
 92 | 
 93 | struct SubtreeInlineData {
 94 |   bool is_inline : 1;
 95 |   SUBTREE_BITS
 96 |   uint8_t symbol;
 97 |   uint16_t parse_state;
 98 |   SUBTREE_SIZE
 99 | };
100 | 
101 | #endif
102 | 
103 | #undef SUBTREE_BITS
104 | #undef SUBTREE_SIZE
105 | 
106 | // A heap-allocated representation of a subtree.
107 | //
108 | // This representation is used for parent nodes, external tokens,
109 | // errors, and other leaf nodes whose data is too large to fit into
110 | // the inline representation.
111 | typedef struct {
112 |   volatile uint32_t ref_count;
113 |   Length padding;
114 |   Length size;
115 |   uint32_t lookahead_bytes;
116 |   uint32_t error_cost;
117 |   uint32_t child_count;
118 |   TSSymbol symbol;
119 |   TSStateId parse_state;
120 | 
121 |   bool visible : 1;
122 |   bool named : 1;
123 |   bool extra : 1;
124 |   bool fragile_left : 1;
125 |   bool fragile_right : 1;
126 |   bool has_changes : 1;
127 |   bool has_external_tokens : 1;
128 |   bool has_external_scanner_state_change : 1;
129 |   bool depends_on_column: 1;
130 |   bool is_missing : 1;
131 |   bool is_keyword : 1;
132 | 
133 |   union {
134 |     // Non-terminal subtrees (`child_count > 0`)
135 |     struct {
136 |       uint32_t visible_child_count;
137 |       uint32_t named_child_count;
138 |       uint32_t visible_descendant_count;
139 |       int32_t dynamic_precedence;
140 |       uint16_t repeat_depth;
141 |       uint16_t production_id;
142 |       struct {
143 |         TSSymbol symbol;
144 |         TSStateId parse_state;
145 |       } first_leaf;
146 |     };
147 | 
148 |     // External terminal subtrees (`child_count == 0 && has_external_tokens`)
149 |     ExternalScannerState external_scanner_state;
150 | 
151 |     // Error terminal subtrees (`child_count == 0 && symbol == ts_builtin_sym_error`)
152 |     int32_t lookahead_char;
153 |   };
154 | } SubtreeHeapData;
155 | 
156 | // The fundamental building block of a syntax tree.
157 | typedef union {
158 |   SubtreeInlineData data;
159 |   const SubtreeHeapData *ptr;
160 | } Subtree;
161 | 
162 | // Like Subtree, but mutable.
163 | typedef union {
164 |   SubtreeInlineData data;
165 |   SubtreeHeapData *ptr;
166 | } MutableSubtree;
167 | 
168 | typedef Array(Subtree) SubtreeArray;
169 | typedef Array(MutableSubtree) MutableSubtreeArray;
170 | 
171 | typedef struct {
172 |   MutableSubtreeArray free_trees;
173 |   MutableSubtreeArray tree_stack;
174 | } SubtreePool;
175 | 
176 | void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length);
177 | const char *ts_external_scanner_state_data(const ExternalScannerState *self);
178 | bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *buffer, unsigned length);
179 | void ts_external_scanner_state_delete(ExternalScannerState *self);
180 | 
181 | void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest);
182 | void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self);
183 | void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self);
184 | void ts_subtree_array_remove_trailing_extras(SubtreeArray *self, SubtreeArray *destination);
185 | void ts_subtree_array_reverse(SubtreeArray *self);
186 | 
187 | SubtreePool ts_subtree_pool_new(uint32_t capacity);
188 | void ts_subtree_pool_delete(SubtreePool *self);
189 | 
190 | Subtree ts_subtree_new_leaf(
191 |   SubtreePool *pool, TSSymbol symbol, Length padding, Length size,
192 |   uint32_t lookahead_bytes, TSStateId parse_state,
193 |   bool has_external_tokens, bool depends_on_column,
194 |   bool is_keyword, const TSLanguage *language
195 | );
196 | Subtree ts_subtree_new_error(
197 |   SubtreePool *pool, int32_t lookahead_char, Length padding, Length size,
198 |   uint32_t bytes_scanned, TSStateId parse_state, const TSLanguage *language
199 | );
200 | MutableSubtree ts_subtree_new_node(
201 |   TSSymbol symbol,
202 |   SubtreeArray *chiildren,
203 |   unsigned production_id,
204 |   const TSLanguage *language
205 | );
206 | Subtree ts_subtree_new_error_node(
207 |   SubtreeArray *children,
208 |   bool extra,
209 |   const TSLanguage * language
210 | );
211 | Subtree ts_subtree_new_missing_leaf(
212 |   SubtreePool *pool,
213 |   TSSymbol symbol,
214 |   Length padding,
215 |   uint32_t lookahead_bytes,
216 |   const TSLanguage *language
217 | );
218 | MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self);
219 | void ts_subtree_retain(Subtree self);
220 | void ts_subtree_release(SubtreePool *pool, Subtree self);
221 | int ts_subtree_compare(Subtree left, Subtree right, SubtreePool *pool);
222 | void ts_subtree_set_symbol(MutableSubtree *self, TSSymbol symbol, const TSLanguage *language);
223 | void ts_subtree_compress(MutableSubtree self, unsigned count, const TSLanguage *language, MutableSubtreeArray *stack);
224 | void ts_subtree_summarize_children(MutableSubtree self, const TSLanguage *language);
225 | Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool);
226 | char *ts_subtree_string(Subtree self, TSSymbol alias_symbol, bool alias_is_named, const TSLanguage *language, bool include_all);
227 | void ts_subtree_print_dot_graph(Subtree self, const TSLanguage *language, FILE *f);
228 | Subtree ts_subtree_last_external_token(Subtree tree);
229 | const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self);
230 | bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other);
231 | 
232 | #define SUBTREE_GET(self, name) ((self).data.is_inline ? (self).data.name : (self).ptr->name)
233 | 
234 | static inline TSSymbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); }
235 | static inline bool ts_subtree_visible(Subtree self) { return SUBTREE_GET(self, visible); }
236 | static inline bool ts_subtree_named(Subtree self) { return SUBTREE_GET(self, named); }
237 | static inline bool ts_subtree_extra(Subtree self) { return SUBTREE_GET(self, extra); }
238 | static inline bool ts_subtree_has_changes(Subtree self) { return SUBTREE_GET(self, has_changes); }
239 | static inline bool ts_subtree_missing(Subtree self) { return SUBTREE_GET(self, is_missing); }
240 | static inline bool ts_subtree_is_keyword(Subtree self) { return SUBTREE_GET(self, is_keyword); }
241 | static inline TSStateId ts_subtree_parse_state(Subtree self) { return SUBTREE_GET(self, parse_state); }
242 | static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE_GET(self, lookahead_bytes); }
243 | 
244 | #undef SUBTREE_GET
245 | 
246 | // Get the size needed to store a heap-allocated subtree with the given
247 | // number of children.
248 | static inline size_t ts_subtree_alloc_size(uint32_t child_count) {
249 |   return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData);
250 | }
251 | 
252 | // Get a subtree's children, which are allocated immediately before the
253 | // tree's own heap data.
254 | #define ts_subtree_children(self) \
255 |   ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count)
256 | 
257 | static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) {
258 |   if (self->data.is_inline) {
259 |     self->data.extra = is_extra;
260 |   } else {
261 |     self->ptr->extra = is_extra;
262 |   }
263 | }
264 | 
265 | static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) {
266 |   if (self.data.is_inline) return self.data.symbol;
267 |   if (self.ptr->child_count == 0) return self.ptr->symbol;
268 |   return self.ptr->first_leaf.symbol;
269 | }
270 | 
271 | static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) {
272 |   if (self.data.is_inline) return self.data.parse_state;
273 |   if (self.ptr->child_count == 0) return self.ptr->parse_state;
274 |   return self.ptr->first_leaf.parse_state;
275 | }
276 | 
277 | static inline Length ts_subtree_padding(Subtree self) {
278 |   if (self.data.is_inline) {
279 |     Length result = {self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}};
280 |     return result;
281 |   } else {
282 |     return self.ptr->padding;
283 |   }
284 | }
285 | 
286 | static inline Length ts_subtree_size(Subtree self) {
287 |   if (self.data.is_inline) {
288 |     Length result = {self.data.size_bytes, {0, self.data.size_bytes}};
289 |     return result;
290 |   } else {
291 |     return self.ptr->size;
292 |   }
293 | }
294 | 
295 | static inline Length ts_subtree_total_size(Subtree self) {
296 |   return length_add(ts_subtree_padding(self), ts_subtree_size(self));
297 | }
298 | 
299 | static inline uint32_t ts_subtree_total_bytes(Subtree self) {
300 |   return ts_subtree_total_size(self).bytes;
301 | }
302 | 
303 | static inline uint32_t ts_subtree_child_count(Subtree self) {
304 |   return self.data.is_inline ? 0 : self.ptr->child_count;
305 | }
306 | 
307 | static inline uint32_t ts_subtree_repeat_depth(Subtree self) {
308 |   return self.data.is_inline ? 0 : self.ptr->repeat_depth;
309 | }
310 | 
311 | static inline uint32_t ts_subtree_is_repetition(Subtree self) {
312 |   return self.data.is_inline
313 |     ? 0
314 |     : !self.ptr->named && !self.ptr->visible && self.ptr->child_count != 0;
315 | }
316 | 
317 | static inline uint32_t ts_subtree_visible_descendant_count(Subtree self) {
318 |   return (self.data.is_inline || self.ptr->child_count == 0)
319 |     ? 0
320 |     : self.ptr->visible_descendant_count;
321 | }
322 | 
323 | static inline uint32_t ts_subtree_visible_child_count(Subtree self) {
324 |   if (ts_subtree_child_count(self) > 0) {
325 |     return self.ptr->visible_child_count;
326 |   } else {
327 |     return 0;
328 |   }
329 | }
330 | 
331 | static inline uint32_t ts_subtree_error_cost(Subtree self) {
332 |   if (ts_subtree_missing(self)) {
333 |     return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY;
334 |   } else {
335 |     return self.data.is_inline ? 0 : self.ptr->error_cost;
336 |   }
337 | }
338 | 
339 | static inline int32_t ts_subtree_dynamic_precedence(Subtree self) {
340 |   return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence;
341 | }
342 | 
343 | static inline uint16_t ts_subtree_production_id(Subtree self) {
344 |   if (ts_subtree_child_count(self) > 0) {
345 |     return self.ptr->production_id;
346 |   } else {
347 |     return 0;
348 |   }
349 | }
350 | 
351 | static inline bool ts_subtree_fragile_left(Subtree self) {
352 |   return self.data.is_inline ? false : self.ptr->fragile_left;
353 | }
354 | 
355 | static inline bool ts_subtree_fragile_right(Subtree self) {
356 |   return self.data.is_inline ? false : self.ptr->fragile_right;
357 | }
358 | 
359 | static inline bool ts_subtree_has_external_tokens(Subtree self) {
360 |   return self.data.is_inline ? false : self.ptr->has_external_tokens;
361 | }
362 | 
363 | static inline bool ts_subtree_has_external_scanner_state_change(Subtree self) {
364 |   return self.data.is_inline ? false : self.ptr->has_external_scanner_state_change;
365 | }
366 | 
367 | static inline bool ts_subtree_depends_on_column(Subtree self) {
368 |   return self.data.is_inline ? false : self.ptr->depends_on_column;
369 | }
370 | 
371 | static inline bool ts_subtree_is_fragile(Subtree self) {
372 |   return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right);
373 | }
374 | 
375 | static inline bool ts_subtree_is_error(Subtree self) {
376 |   return ts_subtree_symbol(self) == ts_builtin_sym_error;
377 | }
378 | 
379 | static inline bool ts_subtree_is_eof(Subtree self) {
380 |   return ts_subtree_symbol(self) == ts_builtin_sym_end;
381 | }
382 | 
383 | static inline Subtree ts_subtree_from_mut(MutableSubtree self) {
384 |   Subtree result;
385 |   result.data = self.data;
386 |   return result;
387 | }
388 | 
389 | static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) {
390 |   MutableSubtree result;
391 |   result.data = self.data;
392 |   return result;
393 | }
394 | 
395 | #ifdef __cplusplus
396 | }
397 | #endif
398 | 
399 | #endif  // TREE_SITTER_SUBTREE_H_
400 | 


--------------------------------------------------------------------------------
/src/unicode/umachine.h:
--------------------------------------------------------------------------------
  1 | // © 2016 and later: Unicode, Inc. and others.
  2 | // License & terms of use: http://www.unicode.org/copyright.html
  3 | /*
  4 | ******************************************************************************
  5 | *
  6 | *   Copyright (C) 1999-2015, International Business Machines
  7 | *   Corporation and others.  All Rights Reserved.
  8 | *
  9 | ******************************************************************************
 10 | *   file name:  umachine.h
 11 | *   encoding:   UTF-8
 12 | *   tab size:   8 (not used)
 13 | *   indentation:4
 14 | *
 15 | *   created on: 1999sep13
 16 | *   created by: Markus W. Scherer
 17 | *
 18 | *   This file defines basic types and constants for ICU to be
 19 | *   platform-independent. umachine.h and utf.h are included into
 20 | *   utypes.h to provide all the general definitions for ICU.
 21 | *   All of these definitions used to be in utypes.h before
 22 | *   the UTF-handling macros made this unmaintainable.
 23 | */
 24 | 
 25 | #ifndef __UMACHINE_H__
 26 | #define __UMACHINE_H__
 27 | 
 28 | 
 29 | /**
 30 |  * \file
 31 |  * \brief Basic types and constants for UTF
 32 |  *
 33 |  * <h2> Basic types and constants for UTF </h2>
 34 |  *   This file defines basic types and constants for utf.h to be
 35 |  *   platform-independent. umachine.h and utf.h are included into
 36 |  *   utypes.h to provide all the general definitions for ICU.
 37 |  *   All of these definitions used to be in utypes.h before
 38 |  *   the UTF-handling macros made this unmaintainable.
 39 |  *
 40 |  */
 41 | /*==========================================================================*/
 42 | /* Include platform-dependent definitions                                   */
 43 | /* which are contained in the platform-specific file platform.h             */
 44 | /*==========================================================================*/
 45 | 
 46 | #include "unicode/ptypes.h" /* platform.h is included in ptypes.h */
 47 | 
 48 | /*
 49 |  * ANSI C headers:
 50 |  * stddef.h defines wchar_t
 51 |  */
 52 | #include <stddef.h>
 53 | 
 54 | /*==========================================================================*/
 55 | /* For C wrappers, we use the symbol U_STABLE.                                */
 56 | /* This works properly if the includer is C or C++.                         */
 57 | /* Functions are declared   U_STABLE return-type U_EXPORT2 function-name()... */
 58 | /*==========================================================================*/
 59 | 
 60 | /**
 61 |  * \def U_CFUNC
 62 |  * This is used in a declaration of a library private ICU C function.
 63 |  * @stable ICU 2.4
 64 |  */
 65 | 
 66 | /**
 67 |  * \def U_CDECL_BEGIN
 68 |  * This is used to begin a declaration of a library private ICU C API.
 69 |  * @stable ICU 2.4
 70 |  */
 71 | 
 72 | /**
 73 |  * \def U_CDECL_END
 74 |  * This is used to end a declaration of a library private ICU C API
 75 |  * @stable ICU 2.4
 76 |  */
 77 | 
 78 | #ifdef __cplusplus
 79 | #   define U_CFUNC extern "C"
 80 | #   define U_CDECL_BEGIN extern "C" {
 81 | #   define U_CDECL_END   }
 82 | #else
 83 | #   define U_CFUNC extern
 84 | #   define U_CDECL_BEGIN
 85 | #   define U_CDECL_END
 86 | #endif
 87 | 
 88 | #ifndef U_ATTRIBUTE_DEPRECATED
 89 | /**
 90 |  * \def U_ATTRIBUTE_DEPRECATED
 91 |  *  This is used for GCC specific attributes
 92 |  * @internal
 93 |  */
 94 | #if U_GCC_MAJOR_MINOR >= 302
 95 | #    define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
 96 | /**
 97 |  * \def U_ATTRIBUTE_DEPRECATED
 98 |  * This is used for Visual C++ specific attributes
 99 |  * @internal
100 |  */
101 | #elif defined(_MSC_VER) && (_MSC_VER >= 1400)
102 | #    define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
103 | #else
104 | #    define U_ATTRIBUTE_DEPRECATED
105 | #endif
106 | #endif
107 | 
108 | /** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
109 | #define U_CAPI U_CFUNC U_EXPORT
110 | /** This is used to declare a function as a stable public ICU C API*/
111 | #define U_STABLE U_CAPI
112 | /** This is used to declare a function as a draft public ICU C API  */
113 | #define U_DRAFT  U_CAPI
114 | /** This is used to declare a function as a deprecated public ICU C API  */
115 | #define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED
116 | /** This is used to declare a function as an obsolete public ICU C API  */
117 | #define U_OBSOLETE U_CAPI
118 | /** This is used to declare a function as an internal ICU C API  */
119 | #define U_INTERNAL U_CAPI
120 | 
121 | /**
122 |  * \def U_OVERRIDE
123 |  * Defined to the C++11 "override" keyword if available.
124 |  * Denotes a class or member which is an override of the base class.
125 |  * May result in an error if it applied to something not an override.
126 |  * @internal
127 |  */
128 | #ifndef U_OVERRIDE
129 | #define U_OVERRIDE override
130 | #endif
131 | 
132 | /**
133 |  * \def U_FINAL
134 |  * Defined to the C++11 "final" keyword if available.
135 |  * Denotes a class or member which may not be overridden in subclasses.
136 |  * May result in an error if subclasses attempt to override.
137 |  * @internal
138 |  */
139 | #if !defined(U_FINAL) || defined(U_IN_DOXYGEN)
140 | #define U_FINAL final
141 | #endif
142 | 
143 | // Before ICU 65, function-like, multi-statement ICU macros were just defined as
144 | // series of statements wrapped in { } blocks and the caller could choose to
145 | // either treat them as if they were actual functions and end the invocation
146 | // with a trailing ; creating an empty statement after the block or else omit
147 | // this trailing ; using the knowledge that the macro would expand to { }.
148 | //
149 | // But doing so doesn't work well with macros that look like functions and
150 | // compiler warnings about empty statements (ICU-20601) and ICU 65 therefore
151 | // switches to the standard solution of wrapping such macros in do { } while.
152 | //
153 | // This will however break existing code that depends on being able to invoke
154 | // these macros without a trailing ; so to be able to remain compatible with
155 | // such code the wrapper is itself defined as macros so that it's possible to
156 | // build ICU 65 and later with the old macro behaviour, like this:
157 | //
158 | // CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""'
159 | // runConfigureICU ...
160 | 
161 | /**
162 |  * \def UPRV_BLOCK_MACRO_BEGIN
163 |  * Defined as the "do" keyword by default.
164 |  * @internal
165 |  */
166 | #ifndef UPRV_BLOCK_MACRO_BEGIN
167 | #define UPRV_BLOCK_MACRO_BEGIN do
168 | #endif
169 | 
170 | /**
171 |  * \def UPRV_BLOCK_MACRO_END
172 |  * Defined as "while (FALSE)" by default.
173 |  * @internal
174 |  */
175 | #ifndef UPRV_BLOCK_MACRO_END
176 | #define UPRV_BLOCK_MACRO_END while (FALSE)
177 | #endif
178 | 
179 | /*==========================================================================*/
180 | /* limits for int32_t etc., like in POSIX inttypes.h                        */
181 | /*==========================================================================*/
182 | 
183 | #ifndef INT8_MIN
184 | /** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
185 | #   define INT8_MIN        ((int8_t)(-128))
186 | #endif
187 | #ifndef INT16_MIN
188 | /** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
189 | #   define INT16_MIN       ((int16_t)(-32767-1))
190 | #endif
191 | #ifndef INT32_MIN
192 | /** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
193 | #   define INT32_MIN       ((int32_t)(-2147483647-1))
194 | #endif
195 | 
196 | #ifndef INT8_MAX
197 | /** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
198 | #   define INT8_MAX        ((int8_t)(127))
199 | #endif
200 | #ifndef INT16_MAX
201 | /** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
202 | #   define INT16_MAX       ((int16_t)(32767))
203 | #endif
204 | #ifndef INT32_MAX
205 | /** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
206 | #   define INT32_MAX       ((int32_t)(2147483647))
207 | #endif
208 | 
209 | #ifndef UINT8_MAX
210 | /** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
211 | #   define UINT8_MAX       ((uint8_t)(255U))
212 | #endif
213 | #ifndef UINT16_MAX
214 | /** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
215 | #   define UINT16_MAX      ((uint16_t)(65535U))
216 | #endif
217 | #ifndef UINT32_MAX
218 | /** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
219 | #   define UINT32_MAX      ((uint32_t)(4294967295U))
220 | #endif
221 | 
222 | #if defined(U_INT64_T_UNAVAILABLE)
223 | # error int64_t is required for decimal format and rule-based number format.
224 | #else
225 | # ifndef INT64_C
226 | /**
227 |  * Provides a platform independent way to specify a signed 64-bit integer constant.
228 |  * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
229 |  * @stable ICU 2.8
230 |  */
231 | #   define INT64_C(c) c ## LL
232 | # endif
233 | # ifndef UINT64_C
234 | /**
235 |  * Provides a platform independent way to specify an unsigned 64-bit integer constant.
236 |  * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
237 |  * @stable ICU 2.8
238 |  */
239 | #   define UINT64_C(c) c ## ULL
240 | # endif
241 | # ifndef U_INT64_MIN
242 | /** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
243 | #     define U_INT64_MIN       ((int64_t)(INT64_C(-9223372036854775807)-1))
244 | # endif
245 | # ifndef U_INT64_MAX
246 | /** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
247 | #     define U_INT64_MAX       ((int64_t)(INT64_C(9223372036854775807)))
248 | # endif
249 | # ifndef U_UINT64_MAX
250 | /** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
251 | #     define U_UINT64_MAX      ((uint64_t)(UINT64_C(18446744073709551615)))
252 | # endif
253 | #endif
254 | 
255 | /*==========================================================================*/
256 | /* Boolean data type                                                        */
257 | /*==========================================================================*/
258 | 
259 | /** The ICU boolean type @stable ICU 2.0 */
260 | typedef int8_t UBool;
261 | 
262 | #ifndef TRUE
263 | /** The TRUE value of a UBool @stable ICU 2.0 */
264 | #   define TRUE  1
265 | #endif
266 | #ifndef FALSE
267 | /** The FALSE value of a UBool @stable ICU 2.0 */
268 | #   define FALSE 0
269 | #endif
270 | 
271 | 
272 | /*==========================================================================*/
273 | /* Unicode data types                                                       */
274 | /*==========================================================================*/
275 | 
276 | /* wchar_t-related definitions -------------------------------------------- */
277 | 
278 | /*
279 |  * \def U_WCHAR_IS_UTF16
280 |  * Defined if wchar_t uses UTF-16.
281 |  *
282 |  * @stable ICU 2.0
283 |  */
284 | /*
285 |  * \def U_WCHAR_IS_UTF32
286 |  * Defined if wchar_t uses UTF-32.
287 |  *
288 |  * @stable ICU 2.0
289 |  */
290 | #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
291 | #   ifdef __STDC_ISO_10646__
292 | #       if (U_SIZEOF_WCHAR_T==2)
293 | #           define U_WCHAR_IS_UTF16
294 | #       elif (U_SIZEOF_WCHAR_T==4)
295 | #           define  U_WCHAR_IS_UTF32
296 | #       endif
297 | #   elif defined __UCS2__
298 | #       if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
299 | #           define U_WCHAR_IS_UTF16
300 | #       endif
301 | #   elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
302 | #       if (U_SIZEOF_WCHAR_T==4)
303 | #           define U_WCHAR_IS_UTF32
304 | #       endif
305 | #   elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
306 | #       define U_WCHAR_IS_UTF32
307 | #   elif U_PLATFORM_HAS_WIN32_API
308 | #       define U_WCHAR_IS_UTF16
309 | #   endif
310 | #endif
311 | 
312 | /* UChar and UChar32 definitions -------------------------------------------- */
313 | 
314 | /** Number of bytes in a UChar. @stable ICU 2.0 */
315 | #define U_SIZEOF_UCHAR 2
316 | 
317 | /**
318 |  * \def U_CHAR16_IS_TYPEDEF
319 |  * If 1, then char16_t is a typedef and not a real type (yet)
320 |  * @internal
321 |  */
322 | #if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
323 | // for AIX, uchar.h needs to be included
324 | # include <uchar.h>
325 | # define U_CHAR16_IS_TYPEDEF 1
326 | #elif defined(_MSC_VER) && (_MSC_VER < 1900)
327 | // Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
328 | // and instead use a typedef.  https://msdn.microsoft.com/library/bb531344.aspx
329 | # define U_CHAR16_IS_TYPEDEF 1
330 | #else
331 | # define U_CHAR16_IS_TYPEDEF 0
332 | #endif
333 | 
334 | 
335 | /**
336 |  * \var UChar
337 |  *
338 |  * The base type for UTF-16 code units and pointers.
339 |  * Unsigned 16-bit integer.
340 |  * Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.
341 |  *
342 |  * UChar is configurable by defining the macro UCHAR_TYPE
343 |  * on the preprocessor or compiler command line:
344 |  * -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc.
345 |  * (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.)
346 |  * This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.
347 |  *
348 |  * The default is UChar=char16_t.
349 |  *
350 |  * C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.
351 |  *
352 |  * In C, char16_t is a simple typedef of uint_least16_t.
353 |  * ICU requires uint_least16_t=uint16_t for data memory mapping.
354 |  * On macOS, char16_t is not available because the uchar.h standard header is missing.
355 |  *
356 |  * @stable ICU 4.4
357 |  */
358 | 
359 | #if 1
360 |     // #if 1 is normal. UChar defaults to char16_t in C++.
361 |     // For configuration testing of UChar=uint16_t temporarily change this to #if 0.
362 |     // The intltest Makefile #defines UCHAR_TYPE=char16_t,
363 |     // so we only #define it to uint16_t if it is undefined so far.
364 | #elif !defined(UCHAR_TYPE)
365 | #   define UCHAR_TYPE uint16_t
366 | #endif
367 | 
368 | #if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
369 |         defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
370 |     // Inside the ICU library code, never configurable.
371 |     typedef char16_t UChar;
372 | #elif defined(UCHAR_TYPE)
373 |     typedef UCHAR_TYPE UChar;
374 | #elif defined(__cplusplus)
375 |     typedef char16_t UChar;
376 | #else
377 |     typedef uint16_t UChar;
378 | #endif
379 | 
380 | /**
381 |  * \var OldUChar
382 |  * Default ICU 58 definition of UChar.
383 |  * A base type for UTF-16 code units and pointers.
384 |  * Unsigned 16-bit integer.
385 |  *
386 |  * Define OldUChar to be wchar_t if that is 16 bits wide.
387 |  * If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
388 |  *
389 |  * This makes the definition of OldUChar platform-dependent
390 |  * but allows direct string type compatibility with platforms with
391 |  * 16-bit wchar_t types.
392 |  *
393 |  * This is how UChar was defined in ICU 58, for transition convenience.
394 |  * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
395 |  * The current UChar responds to UCHAR_TYPE but OldUChar does not.
396 |  *
397 |  * @stable ICU 59
398 |  */
399 | #if U_SIZEOF_WCHAR_T==2
400 |     typedef wchar_t OldUChar;
401 | #elif defined(__CHAR16_TYPE__)
402 |     typedef __CHAR16_TYPE__ OldUChar;
403 | #else
404 |     typedef uint16_t OldUChar;
405 | #endif
406 | 
407 | /**
408 |  * Define UChar32 as a type for single Unicode code points.
409 |  * UChar32 is a signed 32-bit integer (same as int32_t).
410 |  *
411 |  * The Unicode code point range is 0..0x10ffff.
412 |  * All other values (negative or >=0x110000) are illegal as Unicode code points.
413 |  * They may be used as sentinel values to indicate "done", "error"
414 |  * or similar non-code point conditions.
415 |  *
416 |  * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
417 |  * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
418 |  * or else to be uint32_t.
419 |  * That is, the definition of UChar32 was platform-dependent.
420 |  *
421 |  * @see U_SENTINEL
422 |  * @stable ICU 2.4
423 |  */
424 | typedef int32_t UChar32;
425 | 
426 | /**
427 |  * This value is intended for sentinel values for APIs that
428 |  * (take or) return single code points (UChar32).
429 |  * It is outside of the Unicode code point range 0..0x10ffff.
430 |  *
431 |  * For example, a "done" or "error" value in a new API
432 |  * could be indicated with U_SENTINEL.
433 |  *
434 |  * ICU APIs designed before ICU 2.4 usually define service-specific "done"
435 |  * values, mostly 0xffff.
436 |  * Those may need to be distinguished from
437 |  * actual U+ffff text contents by calling functions like
438 |  * CharacterIterator::hasNext() or UnicodeString::length().
439 |  *
440 |  * @return -1
441 |  * @see UChar32
442 |  * @stable ICU 2.4
443 |  */
444 | #define U_SENTINEL (-1)
445 | 
446 | #include "unicode/urename.h"
447 | 
448 | #endif
449 | 


--------------------------------------------------------------------------------
/src/lexer.c:
--------------------------------------------------------------------------------
  1 | #include "./length.h"
  2 | #include "./lexer.h"
  3 | #include "./unicode.h"
  4 | 
  5 | #include "tree_sitter/api.h"
  6 | 
  7 | #include <stdarg.h>
  8 | #include <stdio.h>
  9 | 
 10 | #define LOG(message, character)              \
 11 |   if (self->logger.log) {                    \
 12 |     snprintf(                                \
 13 |       self->debug_buffer,                    \
 14 |       TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \
 15 |       32 <= character && character < 127 ?   \
 16 |         message " character:'%c'" :          \
 17 |         message " character:%d",             \
 18 |       character                              \
 19 |     );                                       \
 20 |     self->logger.log(                        \
 21 |       self->logger.payload,                  \
 22 |       TSLogTypeLex,                          \
 23 |       self->debug_buffer                     \
 24 |     );                                       \
 25 |   }
 26 | 
 27 | static const int32_t BYTE_ORDER_MARK = 0xFEFF;
 28 | 
 29 | static const TSRange DEFAULT_RANGE = {
 30 |   .start_point = {
 31 |     .row = 0,
 32 |     .column = 0,
 33 |   },
 34 |   .end_point = {
 35 |     .row = UINT32_MAX,
 36 |     .column = UINT32_MAX,
 37 |   },
 38 |   .start_byte = 0,
 39 |   .end_byte = UINT32_MAX
 40 | };
 41 | 
 42 | /**
 43 |  * Sets the column data to the given value and marks it valid.
 44 |  * @param self The lexer state.
 45 |  * @param val The new value of the column data.
 46 |  */
 47 | static void ts_lexer__set_column_data(Lexer *self, uint32_t val) {
 48 |   self->column_data.valid = true;
 49 |   self->column_data.value = val;
 50 | }
 51 | 
 52 | /**
 53 |  * Increments the value of the column data; no-op if invalid.
 54 |  * @param self The lexer state.
 55 |  */
 56 | static void ts_lexer__increment_column_data(Lexer *self) {
 57 |   if (self->column_data.valid) {
 58 |     self->column_data.value++;
 59 |   }
 60 | }
 61 | 
 62 | /**
 63 |  * Marks the column data as invalid.
 64 |  * @param self The lexer state.
 65 |  */
 66 | static void ts_lexer__invalidate_column_data(Lexer *self) {
 67 |   self->column_data.valid = false;
 68 |   self->column_data.value = 0;
 69 | }
 70 | 
 71 | // Check if the lexer has reached EOF. This state is stored
 72 | // by setting the lexer's `current_included_range_index` such that
 73 | // it has consumed all of its available ranges.
 74 | static bool ts_lexer__eof(const TSLexer *_self) {
 75 |   Lexer *self = (Lexer *)_self;
 76 |   return self->current_included_range_index == self->included_range_count;
 77 | }
 78 | 
 79 | // Clear the currently stored chunk of source code, because the lexer's
 80 | // position has changed.
 81 | static void ts_lexer__clear_chunk(Lexer *self) {
 82 |   self->chunk = NULL;
 83 |   self->chunk_size = 0;
 84 |   self->chunk_start = 0;
 85 | }
 86 | 
 87 | // Call the lexer's input callback to obtain a new chunk of source code
 88 | // for the current position.
 89 | static void ts_lexer__get_chunk(Lexer *self) {
 90 |   self->chunk_start = self->current_position.bytes;
 91 |   self->chunk = self->input.read(
 92 |     self->input.payload,
 93 |     self->current_position.bytes,
 94 |     self->current_position.extent,
 95 |     &self->chunk_size
 96 |   );
 97 |   if (!self->chunk_size) {
 98 |     self->current_included_range_index = self->included_range_count;
 99 |     self->chunk = NULL;
100 |   }
101 | }
102 | 
103 | // Decode the next unicode character in the current chunk of source code.
104 | // This assumes that the lexer has already retrieved a chunk of source
105 | // code that spans the current position.
106 | static void ts_lexer__get_lookahead(Lexer *self) {
107 |   uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start;
108 |   uint32_t size = self->chunk_size - position_in_chunk;
109 | 
110 |   if (size == 0) {
111 |     self->lookahead_size = 1;
112 |     self->data.lookahead = '\0';
113 |     return;
114 |   }
115 | 
116 |   const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
117 |   DecodeFunction decode =
118 |     self->input.encoding == TSInputEncodingUTF8    ? ts_decode_utf8     :
119 |     self->input.encoding == TSInputEncodingUTF16LE ? ts_decode_utf16_le :
120 |     self->input.encoding == TSInputEncodingUTF16BE ? ts_decode_utf16_be : self->input.decode;
121 | 
122 |   self->lookahead_size = decode(chunk, size, &self->data.lookahead);
123 | 
124 |   // If this chunk ended in the middle of a multi-byte character,
125 |   // try again with a fresh chunk.
126 |   if (self->data.lookahead == TS_DECODE_ERROR && size < 4) {
127 |     ts_lexer__get_chunk(self);
128 |     chunk = (const uint8_t *)self->chunk;
129 |     size = self->chunk_size;
130 |     self->lookahead_size = decode(chunk, size, &self->data.lookahead);
131 |   }
132 | 
133 |   if (self->data.lookahead == TS_DECODE_ERROR) {
134 |     self->lookahead_size = 1;
135 |   }
136 | }
137 | 
138 | static void ts_lexer_goto(Lexer *self, Length position) {
139 |   if (position.bytes != self->current_position.bytes) {
140 |     ts_lexer__invalidate_column_data(self);
141 |   }
142 | 
143 |   self->current_position = position;
144 | 
145 |   // Move to the first valid position at or after the given position.
146 |   bool found_included_range = false;
147 |   for (unsigned i = 0; i < self->included_range_count; i++) {
148 |     TSRange *included_range = &self->included_ranges[i];
149 |     if (
150 |       included_range->end_byte > self->current_position.bytes &&
151 |       included_range->end_byte > included_range->start_byte
152 |     ) {
153 |       if (included_range->start_byte >= self->current_position.bytes) {
154 |         self->current_position = (Length) {
155 |           .bytes = included_range->start_byte,
156 |           .extent = included_range->start_point,
157 |         };
158 |       }
159 | 
160 |       self->current_included_range_index = i;
161 |       found_included_range = true;
162 |       break;
163 |     }
164 |   }
165 | 
166 |   if (found_included_range) {
167 |     // If the current position is outside of the current chunk of text,
168 |     // then clear out the current chunk of text.
169 |     if (self->chunk && (
170 |       self->current_position.bytes < self->chunk_start ||
171 |       self->current_position.bytes >= self->chunk_start + self->chunk_size
172 |     )) {
173 |       ts_lexer__clear_chunk(self);
174 |     }
175 | 
176 |     self->lookahead_size = 0;
177 |     self->data.lookahead = '\0';
178 |   }
179 | 
180 |   // If the given position is beyond any of included ranges, move to the EOF
181 |   // state - past the end of the included ranges.
182 |   else {
183 |     self->current_included_range_index = self->included_range_count;
184 |     TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
185 |     self->current_position = (Length) {
186 |       .bytes = last_included_range->end_byte,
187 |       .extent = last_included_range->end_point,
188 |     };
189 |     ts_lexer__clear_chunk(self);
190 |     self->lookahead_size = 1;
191 |     self->data.lookahead = '\0';
192 |   }
193 | }
194 | 
195 | /**
196 |  * Actually advances the lexer. Does not log anything.
197 |  * @param self The lexer state.
198 |  * @param skip Whether to mark the consumed codepoint as whitespace.
199 |  */
200 | static void ts_lexer__do_advance(Lexer *self, bool skip) {
201 |   if (self->lookahead_size) {
202 |     if (self->data.lookahead == '\n') {
203 |       self->current_position.extent.row++;
204 |       self->current_position.extent.column = 0;
205 |       ts_lexer__set_column_data(self, 0);
206 |     } else {
207 |       bool is_bom = self->current_position.bytes == 0 && 
208 |         self->data.lookahead == BYTE_ORDER_MARK;
209 |       if (!is_bom) ts_lexer__increment_column_data(self);
210 |       self->current_position.extent.column += self->lookahead_size;
211 |     }
212 |     self->current_position.bytes += self->lookahead_size;
213 |   }
214 | 
215 |   const TSRange *current_range = &self->included_ranges[self->current_included_range_index];
216 |   while (
217 |     self->current_position.bytes >= current_range->end_byte ||
218 |     current_range->end_byte == current_range->start_byte
219 |   ) {
220 |     if (self->current_included_range_index < self->included_range_count) {
221 |       self->current_included_range_index++;
222 |     }
223 |     if (self->current_included_range_index < self->included_range_count) {
224 |       current_range++;
225 |       self->current_position = (Length) {
226 |         current_range->start_byte,
227 |         current_range->start_point,
228 |       };
229 |     } else {
230 |       current_range = NULL;
231 |       break;
232 |     }
233 |   }
234 | 
235 |   if (skip) self->token_start_position = self->current_position;
236 | 
237 |   if (current_range) {
238 |     if (
239 |       self->current_position.bytes < self->chunk_start ||
240 |       self->current_position.bytes >= self->chunk_start + self->chunk_size
241 |     ) {
242 |       ts_lexer__get_chunk(self);
243 |     }
244 |     ts_lexer__get_lookahead(self);
245 |   } else {
246 |     ts_lexer__clear_chunk(self);
247 |     self->data.lookahead = '\0';
248 |     self->lookahead_size = 1;
249 |   }
250 | }
251 | 
252 | // Advance to the next character in the source code, retrieving a new
253 | // chunk of source code if needed.
254 | static void ts_lexer__advance(TSLexer *_self, bool skip) {
255 |   Lexer *self = (Lexer *)_self;
256 |   if (!self->chunk) return;
257 | 
258 |   if (skip) {
259 |     LOG("skip", self->data.lookahead)
260 |   } else {
261 |     LOG("consume", self->data.lookahead)
262 |   }
263 | 
264 |   ts_lexer__do_advance(self, skip);
265 | }
266 | 
267 | // Mark that a token match has completed. This can be called multiple
268 | // times if a longer match is found later.
269 | static void ts_lexer__mark_end(TSLexer *_self) {
270 |   Lexer *self = (Lexer *)_self;
271 |   if (!ts_lexer__eof(&self->data)) {
272 |     // If the lexer is right at the beginning of included range,
273 |     // then the token should be considered to end at the *end* of the
274 |     // previous included range, rather than here.
275 |     TSRange *current_included_range = &self->included_ranges[
276 |       self->current_included_range_index
277 |     ];
278 |     if (
279 |       self->current_included_range_index > 0 &&
280 |       self->current_position.bytes == current_included_range->start_byte
281 |     ) {
282 |       TSRange *previous_included_range = current_included_range - 1;
283 |       self->token_end_position = (Length) {
284 |         previous_included_range->end_byte,
285 |         previous_included_range->end_point,
286 |       };
287 |       return;
288 |     }
289 |   }
290 |   self->token_end_position = self->current_position;
291 | }
292 | 
293 | static uint32_t ts_lexer__get_column(TSLexer *_self) {
294 |   Lexer *self = (Lexer *)_self;
295 | 
296 |   self->did_get_column = true;
297 | 
298 |   if (!self->column_data.valid) {
299 |     // Record current position
300 |     uint32_t goal_byte = self->current_position.bytes;
301 | 
302 |     // Back up to the beginning of the line
303 |     Length start_of_col = {
304 |       self->current_position.bytes - self->current_position.extent.column,
305 |       {self->current_position.extent.row, 0},
306 |     };
307 |     ts_lexer_goto(self, start_of_col);
308 |     ts_lexer__set_column_data(self, 0);
309 |     ts_lexer__get_chunk(self);
310 | 
311 |     if (!ts_lexer__eof(_self)) {
312 |       ts_lexer__get_lookahead(self);
313 | 
314 |       // Advance to the recorded position
315 |       while (self->current_position.bytes < goal_byte && !ts_lexer__eof(_self) && self->chunk) {
316 |         ts_lexer__do_advance(self, false);
317 |         if (ts_lexer__eof(_self)) break;
318 |       }
319 |     }
320 |   }
321 | 
322 |   return self->column_data.value;
323 | }
324 | 
325 | // Is the lexer at a boundary between two disjoint included ranges of
326 | // source code? This is exposed as an API because some languages' external
327 | // scanners need to perform custom actions at these boundaries.
328 | static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) {
329 |   const Lexer *self = (const Lexer *)_self;
330 |   if (self->current_included_range_index < self->included_range_count) {
331 |     TSRange *current_range = &self->included_ranges[self->current_included_range_index];
332 |     return self->current_position.bytes == current_range->start_byte;
333 |   } else {
334 |     return false;
335 |   }
336 | }
337 | 
338 | static void ts_lexer__log(const TSLexer *_self, const char *fmt, ...) {
339 |   Lexer *self = (Lexer *)_self;
340 |   va_list args;
341 |   va_start(args, fmt);
342 |   if (self->logger.log) {
343 |     vsnprintf(self->debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, fmt, args);
344 |     self->logger.log(self->logger.payload, TSLogTypeLex, self->debug_buffer);
345 |   }
346 |   va_end(args);
347 | }
348 | 
349 | void ts_lexer_init(Lexer *self) {
350 |   *self = (Lexer) {
351 |     .data = {
352 |       // The lexer's methods are stored as struct fields so that generated
353 |       // parsers can call them without needing to be linked against this
354 |       // library.
355 |       .advance = ts_lexer__advance,
356 |       .mark_end = ts_lexer__mark_end,
357 |       .get_column = ts_lexer__get_column,
358 |       .is_at_included_range_start = ts_lexer__is_at_included_range_start,
359 |       .eof = ts_lexer__eof,
360 |       .log = ts_lexer__log,
361 |       .lookahead = 0,
362 |       .result_symbol = 0,
363 |     },
364 |     .chunk = NULL,
365 |     .chunk_size = 0,
366 |     .chunk_start = 0,
367 |     .current_position = {0, {0, 0}},
368 |     .logger = {
369 |       .payload = NULL,
370 |       .log = NULL
371 |     },
372 |     .included_ranges = NULL,
373 |     .included_range_count = 0,
374 |     .current_included_range_index = 0,
375 |     .did_get_column = false,
376 |     .column_data = {
377 |       .valid = false,
378 |       .value = 0
379 |     }
380 |   };
381 |   ts_lexer_set_included_ranges(self, NULL, 0);
382 | }
383 | 
384 | void ts_lexer_delete(Lexer *self) {
385 |   ts_free(self->included_ranges);
386 | }
387 | 
388 | void ts_lexer_set_input(Lexer *self, TSInput input) {
389 |   self->input = input;
390 |   ts_lexer__clear_chunk(self);
391 |   ts_lexer_goto(self, self->current_position);
392 | }
393 | 
394 | // Move the lexer to the given position. This doesn't do any work
395 | // if the parser is already at the given position.
396 | void ts_lexer_reset(Lexer *self, Length position) {
397 |   if (position.bytes != self->current_position.bytes) {
398 |     ts_lexer_goto(self, position);
399 |   }
400 | }
401 | 
402 | void ts_lexer_start(Lexer *self) {
403 |   self->token_start_position = self->current_position;
404 |   self->token_end_position = LENGTH_UNDEFINED;
405 |   self->data.result_symbol = 0;
406 |   self->did_get_column = false;
407 |   if (!ts_lexer__eof(&self->data)) {
408 |     if (!self->chunk_size) ts_lexer__get_chunk(self);
409 |     if (!self->lookahead_size) ts_lexer__get_lookahead(self);
410 |     if (self->current_position.bytes == 0) {
411 |       if (self->data.lookahead == BYTE_ORDER_MARK) {
412 |         ts_lexer__advance(&self->data, true);
413 |       }
414 |       ts_lexer__set_column_data(self, 0);
415 |     }
416 |   }
417 | }
418 | 
419 | void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) {
420 |   if (length_is_undefined(self->token_end_position)) {
421 |     ts_lexer__mark_end(&self->data);
422 |   }
423 | 
424 |   // If the token ended at an included range boundary, then its end position
425 |   // will have been reset to the end of the preceding range. Reset the start
426 |   // position to match.
427 |   if (self->token_end_position.bytes < self->token_start_position.bytes) {
428 |     self->token_start_position = self->token_end_position;
429 |   }
430 | 
431 |   uint32_t current_lookahead_end_byte = self->current_position.bytes + 1;
432 | 
433 |   // In order to determine that a byte sequence is invalid UTF8 or UTF16,
434 |   // the character decoding algorithm may have looked at the following byte.
435 |   // Therefore, the next byte *after* the current (invalid) character
436 |   // affects the interpretation of the current character.
437 |   if (self->data.lookahead == TS_DECODE_ERROR) {
438 |     current_lookahead_end_byte += 4; // the maximum number of bytes read to identify an invalid code point
439 |   }
440 | 
441 |   if (current_lookahead_end_byte > *lookahead_end_byte) {
442 |     *lookahead_end_byte = current_lookahead_end_byte;
443 |   }
444 | }
445 | 
446 | void ts_lexer_mark_end(Lexer *self) {
447 |   ts_lexer__mark_end(&self->data);
448 | }
449 | 
450 | bool ts_lexer_set_included_ranges(
451 |   Lexer *self,
452 |   const TSRange *ranges,
453 |   uint32_t count
454 | ) {
455 |   if (count == 0 || !ranges) {
456 |     ranges = &DEFAULT_RANGE;
457 |     count = 1;
458 |   } else {
459 |     uint32_t previous_byte = 0;
460 |     for (unsigned i = 0; i < count; i++) {
461 |       const TSRange *range = &ranges[i];
462 |       if (
463 |         range->start_byte < previous_byte ||
464 |         range->end_byte < range->start_byte
465 |       ) return false;
466 |       previous_byte = range->end_byte;
467 |     }
468 |   }
469 | 
470 |   size_t size = count * sizeof(TSRange);
471 |   self->included_ranges = ts_realloc(self->included_ranges, size);
472 |   memcpy(self->included_ranges, ranges, size);
473 |   self->included_range_count = count;
474 |   ts_lexer_goto(self, self->current_position);
475 |   return true;
476 | }
477 | 
478 | TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) {
479 |   *count = self->included_range_count;
480 |   return self->included_ranges;
481 | }
482 | 
483 | #undef LOG
484 | 


--------------------------------------------------------------------------------
/src/get_changed_ranges.c:
--------------------------------------------------------------------------------
  1 | #include "./get_changed_ranges.h"
  2 | #include "./subtree.h"
  3 | #include "./language.h"
  4 | #include "./error_costs.h"
  5 | #include "./tree_cursor.h"
  6 | #include "./ts_assert.h"
  7 | 
  8 | // #define DEBUG_GET_CHANGED_RANGES
  9 | 
 10 | static void ts_range_array_add(
 11 |   TSRangeArray *self,
 12 |   Length start,
 13 |   Length end
 14 | ) {
 15 |   if (self->size > 0) {
 16 |     TSRange *last_range = array_back(self);
 17 |     if (start.bytes <= last_range->end_byte) {
 18 |       last_range->end_byte = end.bytes;
 19 |       last_range->end_point = end.extent;
 20 |       return;
 21 |     }
 22 |   }
 23 | 
 24 |   if (start.bytes < end.bytes) {
 25 |     TSRange range = { start.extent, end.extent, start.bytes, end.bytes };
 26 |     array_push(self, range);
 27 |   }
 28 | }
 29 | 
 30 | bool ts_range_array_intersects(
 31 |   const TSRangeArray *self,
 32 |   unsigned start_index,
 33 |   uint32_t start_byte,
 34 |   uint32_t end_byte
 35 | ) {
 36 |   for (unsigned i = start_index; i < self->size; i++) {
 37 |     TSRange *range = &self->contents[i];
 38 |     if (range->end_byte > start_byte) {
 39 |       if (range->start_byte >= end_byte) break;
 40 |       return true;
 41 |     }
 42 |   }
 43 |   return false;
 44 | }
 45 | 
 46 | void ts_range_array_get_changed_ranges(
 47 |   const TSRange *old_ranges, unsigned old_range_count,
 48 |   const TSRange *new_ranges, unsigned new_range_count,
 49 |   TSRangeArray *differences
 50 | ) {
 51 |   unsigned new_index = 0;
 52 |   unsigned old_index = 0;
 53 |   Length current_position = length_zero();
 54 |   bool in_old_range = false;
 55 |   bool in_new_range = false;
 56 | 
 57 |   while (old_index < old_range_count || new_index < new_range_count) {
 58 |     const TSRange *old_range = &old_ranges[old_index];
 59 |     const TSRange *new_range = &new_ranges[new_index];
 60 | 
 61 |     Length next_old_position;
 62 |     if (in_old_range) {
 63 |       next_old_position = (Length) {old_range->end_byte, old_range->end_point};
 64 |     } else if (old_index < old_range_count) {
 65 |       next_old_position = (Length) {old_range->start_byte, old_range->start_point};
 66 |     } else {
 67 |       next_old_position = LENGTH_MAX;
 68 |     }
 69 | 
 70 |     Length next_new_position;
 71 |     if (in_new_range) {
 72 |       next_new_position = (Length) {new_range->end_byte, new_range->end_point};
 73 |     } else if (new_index < new_range_count) {
 74 |       next_new_position = (Length) {new_range->start_byte, new_range->start_point};
 75 |     } else {
 76 |       next_new_position = LENGTH_MAX;
 77 |     }
 78 | 
 79 |     if (next_old_position.bytes < next_new_position.bytes) {
 80 |       if (in_old_range != in_new_range) {
 81 |         ts_range_array_add(differences, current_position, next_old_position);
 82 |       }
 83 |       if (in_old_range) old_index++;
 84 |       current_position = next_old_position;
 85 |       in_old_range = !in_old_range;
 86 |     } else if (next_new_position.bytes < next_old_position.bytes) {
 87 |       if (in_old_range != in_new_range) {
 88 |         ts_range_array_add(differences, current_position, next_new_position);
 89 |       }
 90 |       if (in_new_range) new_index++;
 91 |       current_position = next_new_position;
 92 |       in_new_range = !in_new_range;
 93 |     } else {
 94 |       if (in_old_range != in_new_range) {
 95 |         ts_range_array_add(differences, current_position, next_new_position);
 96 |       }
 97 |       if (in_old_range) old_index++;
 98 |       if (in_new_range) new_index++;
 99 |       in_old_range = !in_old_range;
100 |       in_new_range = !in_new_range;
101 |       current_position = next_new_position;
102 |     }
103 |   }
104 | }
105 | 
106 | typedef struct {
107 |   TreeCursor cursor;
108 |   const TSLanguage *language;
109 |   unsigned visible_depth;
110 |   bool in_padding;
111 | } Iterator;
112 | 
113 | static Iterator iterator_new(
114 |   TreeCursor *cursor,
115 |   const Subtree *tree,
116 |   const TSLanguage *language
117 | ) {
118 |   array_clear(&cursor->stack);
119 |   array_push(&cursor->stack, ((TreeCursorEntry) {
120 |     .subtree = tree,
121 |     .position = length_zero(),
122 |     .child_index = 0,
123 |     .structural_child_index = 0,
124 |   }));
125 |   return (Iterator) {
126 |     .cursor = *cursor,
127 |     .language = language,
128 |     .visible_depth = 1,
129 |     .in_padding = false,
130 |   };
131 | }
132 | 
133 | static bool iterator_done(Iterator *self) {
134 |   return self->cursor.stack.size == 0;
135 | }
136 | 
137 | static Length iterator_start_position(Iterator *self) {
138 |   TreeCursorEntry entry = *array_back(&self->cursor.stack);
139 |   if (self->in_padding) {
140 |     return entry.position;
141 |   } else {
142 |     return length_add(entry.position, ts_subtree_padding(*entry.subtree));
143 |   }
144 | }
145 | 
146 | static Length iterator_end_position(Iterator *self) {
147 |   TreeCursorEntry entry = *array_back(&self->cursor.stack);
148 |   Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree));
149 |   if (self->in_padding) {
150 |     return result;
151 |   } else {
152 |     return length_add(result, ts_subtree_size(*entry.subtree));
153 |   }
154 | }
155 | 
156 | static bool iterator_tree_is_visible(const Iterator *self) {
157 |   TreeCursorEntry entry = *array_back(&self->cursor.stack);
158 |   if (ts_subtree_visible(*entry.subtree)) return true;
159 |   if (self->cursor.stack.size > 1) {
160 |     Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree;
161 |     return ts_language_alias_at(
162 |       self->language,
163 |       parent.ptr->production_id,
164 |       entry.structural_child_index
165 |     ) != 0;
166 |   }
167 |   return false;
168 | }
169 | 
170 | static void iterator_get_visible_state(
171 |   const Iterator *self,
172 |   Subtree *tree,
173 |   TSSymbol *alias_symbol,
174 |   uint32_t *start_byte
175 | ) {
176 |   uint32_t i = self->cursor.stack.size - 1;
177 | 
178 |   if (self->in_padding) {
179 |     if (i == 0) return;
180 |     i--;
181 |   }
182 | 
183 |   for (; i + 1 > 0; i--) {
184 |     TreeCursorEntry entry = self->cursor.stack.contents[i];
185 | 
186 |     if (i > 0) {
187 |       const Subtree *parent = self->cursor.stack.contents[i - 1].subtree;
188 |       *alias_symbol = ts_language_alias_at(
189 |         self->language,
190 |         parent->ptr->production_id,
191 |         entry.structural_child_index
192 |       );
193 |     }
194 | 
195 |     if (ts_subtree_visible(*entry.subtree) || *alias_symbol) {
196 |       *tree = *entry.subtree;
197 |       *start_byte = entry.position.bytes;
198 |       break;
199 |     }
200 |   }
201 | }
202 | 
203 | static void iterator_ascend(Iterator *self) {
204 |   if (iterator_done(self)) return;
205 |   if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--;
206 |   if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false;
207 |   self->cursor.stack.size--;
208 | }
209 | 
210 | static bool iterator_descend(Iterator *self, uint32_t goal_position) {
211 |   if (self->in_padding) return false;
212 | 
213 |   bool did_descend = false;
214 |   do {
215 |     did_descend = false;
216 |     TreeCursorEntry entry = *array_back(&self->cursor.stack);
217 |     Length position = entry.position;
218 |     uint32_t structural_child_index = 0;
219 |     for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) {
220 |       const Subtree *child = &ts_subtree_children(*entry.subtree)[i];
221 |       Length child_left = length_add(position, ts_subtree_padding(*child));
222 |       Length child_right = length_add(child_left, ts_subtree_size(*child));
223 | 
224 |       if (child_right.bytes > goal_position) {
225 |         array_push(&self->cursor.stack, ((TreeCursorEntry) {
226 |           .subtree = child,
227 |           .position = position,
228 |           .child_index = i,
229 |           .structural_child_index = structural_child_index,
230 |         }));
231 | 
232 |         if (iterator_tree_is_visible(self)) {
233 |           if (child_left.bytes > goal_position) {
234 |             self->in_padding = true;
235 |           } else {
236 |             self->visible_depth++;
237 |           }
238 |           return true;
239 |         }
240 | 
241 |         did_descend = true;
242 |         break;
243 |       }
244 | 
245 |       position = child_right;
246 |       if (!ts_subtree_extra(*child)) structural_child_index++;
247 |     }
248 |   } while (did_descend);
249 | 
250 |   return false;
251 | }
252 | 
253 | static void iterator_advance(Iterator *self) {
254 |   if (self->in_padding) {
255 |     self->in_padding = false;
256 |     if (iterator_tree_is_visible(self)) {
257 |       self->visible_depth++;
258 |     } else {
259 |       iterator_descend(self, 0);
260 |     }
261 |     return;
262 |   }
263 | 
264 |   for (;;) {
265 |     if (iterator_tree_is_visible(self)) self->visible_depth--;
266 |     TreeCursorEntry entry = array_pop(&self->cursor.stack);
267 |     if (iterator_done(self)) return;
268 | 
269 |     const Subtree *parent = array_back(&self->cursor.stack)->subtree;
270 |     uint32_t child_index = entry.child_index + 1;
271 |     if (ts_subtree_child_count(*parent) > child_index) {
272 |       Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree));
273 |       uint32_t structural_child_index = entry.structural_child_index;
274 |       if (!ts_subtree_extra(*entry.subtree)) structural_child_index++;
275 |       const Subtree *next_child = &ts_subtree_children(*parent)[child_index];
276 | 
277 |       array_push(&self->cursor.stack, ((TreeCursorEntry) {
278 |         .subtree = next_child,
279 |         .position = position,
280 |         .child_index = child_index,
281 |         .structural_child_index = structural_child_index,
282 |       }));
283 | 
284 |       if (iterator_tree_is_visible(self)) {
285 |         if (ts_subtree_padding(*next_child).bytes > 0) {
286 |           self->in_padding = true;
287 |         } else {
288 |           self->visible_depth++;
289 |         }
290 |       } else {
291 |         iterator_descend(self, 0);
292 |       }
293 |       break;
294 |     }
295 |   }
296 | }
297 | 
298 | typedef enum {
299 |   IteratorDiffers,
300 |   IteratorMayDiffer,
301 |   IteratorMatches,
302 | } IteratorComparison;
303 | 
304 | static IteratorComparison iterator_compare(
305 |   const Iterator *old_iter,
306 |   const Iterator *new_iter
307 | ) {
308 |   Subtree old_tree = NULL_SUBTREE;
309 |   Subtree new_tree = NULL_SUBTREE;
310 |   uint32_t old_start = 0;
311 |   uint32_t new_start = 0;
312 |   TSSymbol old_alias_symbol = 0;
313 |   TSSymbol new_alias_symbol = 0;
314 |   iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start);
315 |   iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start);
316 | 
317 |   if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches;
318 |   if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers;
319 | 
320 |   if (
321 |     old_alias_symbol == new_alias_symbol &&
322 |     ts_subtree_symbol(old_tree) == ts_subtree_symbol(new_tree)
323 |   ) {
324 |     if (old_start == new_start &&
325 |         !ts_subtree_has_changes(old_tree) &&
326 |         ts_subtree_symbol(old_tree) != ts_builtin_sym_error &&
327 |         ts_subtree_size(old_tree).bytes == ts_subtree_size(new_tree).bytes &&
328 |         ts_subtree_parse_state(old_tree) != TS_TREE_STATE_NONE &&
329 |         ts_subtree_parse_state(new_tree) != TS_TREE_STATE_NONE &&
330 |         (ts_subtree_parse_state(old_tree) == ERROR_STATE) ==
331 |         (ts_subtree_parse_state(new_tree) == ERROR_STATE)) {
332 |       return IteratorMatches;
333 |     } else {
334 |       return IteratorMayDiffer;
335 |     }
336 |   }
337 | 
338 |   return IteratorDiffers;
339 | }
340 | 
341 | #ifdef DEBUG_GET_CHANGED_RANGES
342 | static inline void iterator_print_state(Iterator *self) {
343 |   TreeCursorEntry entry = *array_back(&self->cursor.stack);
344 |   TSPoint start = iterator_start_position(self).extent;
345 |   TSPoint end = iterator_end_position(self).extent;
346 |   const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree));
347 |   printf(
348 |     "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])",
349 |     name, self->in_padding ? "(p)" : "   ",
350 |     self->visible_depth,
351 |     start.row + 1, start.column,
352 |     end.row + 1, end.column
353 |   );
354 | }
355 | #endif
356 | 
357 | unsigned ts_subtree_get_changed_ranges(
358 |   const Subtree *old_tree, const Subtree *new_tree,
359 |   TreeCursor *cursor1, TreeCursor *cursor2,
360 |   const TSLanguage *language,
361 |   const TSRangeArray *included_range_differences,
362 |   TSRange **ranges
363 | ) {
364 |   TSRangeArray results = array_new();
365 | 
366 |   Iterator old_iter = iterator_new(cursor1, old_tree, language);
367 |   Iterator new_iter = iterator_new(cursor2, new_tree, language);
368 | 
369 |   unsigned included_range_difference_index = 0;
370 | 
371 |   Length position = iterator_start_position(&old_iter);
372 |   Length next_position = iterator_start_position(&new_iter);
373 |   if (position.bytes < next_position.bytes) {
374 |     ts_range_array_add(&results, position, next_position);
375 |     position = next_position;
376 |   } else if (position.bytes > next_position.bytes) {
377 |     ts_range_array_add(&results, next_position, position);
378 |     next_position = position;
379 |   }
380 | 
381 |   do {
382 |     #ifdef DEBUG_GET_CHANGED_RANGES
383 |     printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column);
384 |     iterator_print_state(&old_iter);
385 |     printf("\tvs\t");
386 |     iterator_print_state(&new_iter);
387 |     puts("");
388 |     #endif
389 | 
390 |     // Compare the old and new subtrees.
391 |     IteratorComparison comparison = iterator_compare(&old_iter, &new_iter);
392 | 
393 |     // Even if the two subtrees appear to be identical, they could differ
394 |     // internally if they contain a range of text that was previously
395 |     // excluded from the parse, and is now included, or vice-versa.
396 |     if (comparison == IteratorMatches && ts_range_array_intersects(
397 |       included_range_differences,
398 |       included_range_difference_index,
399 |       position.bytes,
400 |       iterator_end_position(&old_iter).bytes
401 |     )) {
402 |       comparison = IteratorMayDiffer;
403 |     }
404 | 
405 |     bool is_changed = false;
406 |     switch (comparison) {
407 |       // If the subtrees are definitely identical, move to the end
408 |       // of both subtrees.
409 |       case IteratorMatches:
410 |         next_position = iterator_end_position(&old_iter);
411 |         break;
412 | 
413 |       // If the subtrees might differ internally, descend into both
414 |       // subtrees, finding the first child that spans the current position.
415 |       case IteratorMayDiffer:
416 |         if (iterator_descend(&old_iter, position.bytes)) {
417 |           if (!iterator_descend(&new_iter, position.bytes)) {
418 |             is_changed = true;
419 |             next_position = iterator_end_position(&old_iter);
420 |           }
421 |         } else if (iterator_descend(&new_iter, position.bytes)) {
422 |           is_changed = true;
423 |           next_position = iterator_end_position(&new_iter);
424 |         } else {
425 |           next_position = length_min(
426 |             iterator_end_position(&old_iter),
427 |             iterator_end_position(&new_iter)
428 |           );
429 |         }
430 |         break;
431 | 
432 |       // If the subtrees are different, record a change and then move
433 |       // to the end of both subtrees.
434 |       case IteratorDiffers:
435 |         is_changed = true;
436 |         next_position = length_min(
437 |           iterator_end_position(&old_iter),
438 |           iterator_end_position(&new_iter)
439 |         );
440 |         break;
441 |     }
442 | 
443 |     // Ensure that both iterators are caught up to the current position.
444 |     while (
445 |       !iterator_done(&old_iter) &&
446 |       iterator_end_position(&old_iter).bytes <= next_position.bytes
447 |     ) iterator_advance(&old_iter);
448 |     while (
449 |       !iterator_done(&new_iter) &&
450 |       iterator_end_position(&new_iter).bytes <= next_position.bytes
451 |     ) iterator_advance(&new_iter);
452 | 
453 |     // Ensure that both iterators are at the same depth in the tree.
454 |     while (old_iter.visible_depth > new_iter.visible_depth) {
455 |       iterator_ascend(&old_iter);
456 |     }
457 |     while (new_iter.visible_depth > old_iter.visible_depth) {
458 |       iterator_ascend(&new_iter);
459 |     }
460 | 
461 |     if (is_changed) {
462 |       #ifdef DEBUG_GET_CHANGED_RANGES
463 |       printf(
464 |         "  change: [[%u, %u] - [%u, %u]]\n",
465 |         position.extent.row + 1, position.extent.column,
466 |         next_position.extent.row + 1, next_position.extent.column
467 |       );
468 |       #endif
469 | 
470 |       ts_range_array_add(&results, position, next_position);
471 |     }
472 | 
473 |     position = next_position;
474 | 
475 |     // Keep track of the current position in the included range differences
476 |     // array in order to avoid scanning the entire array on each iteration.
477 |     while (included_range_difference_index < included_range_differences->size) {
478 |       const TSRange *range = &included_range_differences->contents[
479 |         included_range_difference_index
480 |       ];
481 |       if (range->end_byte <= position.bytes) {
482 |         included_range_difference_index++;
483 |       } else {
484 |         break;
485 |       }
486 |     }
487 |   } while (!iterator_done(&old_iter) && !iterator_done(&new_iter));
488 | 
489 |   Length old_size = ts_subtree_total_size(*old_tree);
490 |   Length new_size = ts_subtree_total_size(*new_tree);
491 |   if (old_size.bytes < new_size.bytes) {
492 |     ts_range_array_add(&results, old_size, new_size);
493 |   } else if (new_size.bytes < old_size.bytes) {
494 |     ts_range_array_add(&results, new_size, old_size);
495 |   }
496 | 
497 |   *cursor1 = old_iter.cursor;
498 |   *cursor2 = new_iter.cursor;
499 |   *ranges = results.contents;
500 |   return results.size;
501 | }
502 | 


--------------------------------------------------------------------------------