├── .clang-format
├── .editorconfig
├── .github
├── dependabot.yml
└── workflows
│ ├── ci.yml
│ ├── docs.yml
│ └── pypi.yml
├── .gitignore
├── .gitmodules
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs
├── _static
│ ├── favicon.png
│ └── logo.png
├── classes
│ ├── tree_sitter.Language.rst
│ ├── tree_sitter.LogType.rst
│ ├── tree_sitter.LookaheadIterator.rst
│ ├── tree_sitter.Node.rst
│ ├── tree_sitter.Parser.rst
│ ├── tree_sitter.Point.rst
│ ├── tree_sitter.Query.rst
│ ├── tree_sitter.QueryCursor.rst
│ ├── tree_sitter.QueryError.rst
│ ├── tree_sitter.QueryPredicate.rst
│ ├── tree_sitter.Range.rst
│ ├── tree_sitter.Tree.rst
│ └── tree_sitter.TreeCursor.rst
├── conf.py
└── index.rst
├── examples
├── usage.py
└── walk_tree.py
├── pyproject.toml
├── setup.py
├── tests
├── __init__.py
├── test_language.py
├── test_lookahead_iterator.py
├── test_node.py
├── test_parser.py
├── test_query.py
└── test_tree.py
└── tree_sitter
├── __init__.py
├── __init__.pyi
├── binding
├── language.c
├── lookahead_iterator.c
├── module.c
├── node.c
├── parser.c
├── query.c
├── query_cursor.c
├── query_predicates.c
├── range.c
├── tree.c
├── tree_cursor.c
└── types.h
└── py.typed
/.clang-format:
--------------------------------------------------------------------------------
1 | BasedOnStyle: LLVM
2 | AlignEscapedNewlinesLeft: false
3 | DerivePointerAlignment: false
4 | PointerAlignment: Right
5 | IndentWidth: 4
6 | ColumnLimit: 100
7 | IncludeBlocks: Preserve
8 | StatementMacros:
9 | - PyObject_HEAD
10 | - Py_BEGIN_ALLOW_THREADS
11 | - Py_END_ALLOW_THREADS
12 | BinPackArguments: true
13 | IndentCaseLabels: true
14 |
--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | charset = utf-8
5 | end_of_line = lf
6 | insert_final_newline = true
7 | trim_trailing_whitespace = true
8 |
9 | [*.md]
10 | indent_size = 2
11 |
12 | [*.rst]
13 | indent_size = 3
14 |
15 | [*.{c,h,py,pyi}]
16 | indent_size = 4
17 | max_line_length = 100
18 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: github-actions
4 | directory: /
5 | schedule:
6 | interval: weekly
7 | day: saturday
8 | commit-message:
9 | prefix: ci
10 | groups:
11 | actions:
12 | patterns: ["*"]
13 | labels: [dependencies]
14 | open-pull-requests-limit: 1
15 | - package-ecosystem: gitsubmodule
16 | directory: /
17 | schedule:
18 | interval: weekly
19 | day: sunday
20 | commit-message:
21 | prefix: build
22 | labels: [dependencies]
23 | open-pull-requests-limit: 1
24 | - package-ecosystem: pip
25 | directory: /
26 | schedule:
27 | interval: weekly
28 | day: friday
29 | commit-message:
30 | prefix: build
31 | labels: [dependencies]
32 | open-pull-requests-limit: 1
33 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | push:
5 | branches: [master]
6 | paths:
7 | - tree_sitter/**
8 | - tests/**
9 | pull_request:
10 | paths:
11 | - tree_sitter/**
12 | - tests/**
13 | workflow_dispatch:
14 |
15 | concurrency:
16 | group: ${{github.workflow}}-${{github.ref}}
17 | cancel-in-progress: true
18 |
19 | jobs:
20 | build:
21 | strategy:
22 | fail-fast: false
23 | matrix:
24 | python: ["3.10", "3.11", "3.12", "3.13"]
25 | os: [ubuntu-latest, macos-latest, windows-latest]
26 | runs-on: ${{matrix.os}}
27 | steps:
28 | - name: Checkout repository
29 | uses: actions/checkout@v4
30 | with:
31 | submodules: true
32 | - name: Set up Python ${{matrix.python}}
33 | uses: actions/setup-python@v5
34 | with:
35 | python-version: ${{matrix.python}}
36 | - name: Lint
37 | continue-on-error: true
38 | run: pipx run ruff check . --output-format=github
39 | - name: Build
40 | run: pip install -v -e .[tests]
41 | env:
42 | CFLAGS: -Wextra -Og -g -fno-omit-frame-pointer
43 | - name: Test
44 | run: python -munittest -v
45 | # FIXME: remove when the tests stop crashing
46 | continue-on-error: ${{runner.os == 'Windows'}}
47 |
--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
1 | name: Docs
2 |
3 | on:
4 | push:
5 | branches: [master]
6 | paths:
7 | - pyproject.toml
8 | - tree_sitter/__init__.py
9 | - tree_sitter/binding/*
10 | - docs/**
11 |
12 | concurrency:
13 | group: ${{github.workflow}}-${{github.ref}}
14 | cancel-in-progress: true
15 |
16 | permissions:
17 | pages: write
18 | id-token: write
19 |
20 | jobs:
21 | docs:
22 | runs-on: ubuntu-latest
23 | environment:
24 | name: github-pages
25 | url: ${{steps.deploy.outputs.page_url}}
26 | steps:
27 | - name: Checkout repository
28 | uses: actions/checkout@v4
29 | with:
30 | submodules: true
31 | - name: Set up Python
32 | uses: actions/setup-python@v5
33 | with:
34 | python-version: "3.11"
35 | - name: Install
36 | run: pip install -e .[docs]
37 | env:
38 | CFLAGS: -O0
39 | - name: Build docs
40 | run: sphinx-build -M html docs docs/_build
41 | - name: Upload docs artifact
42 | uses: actions/upload-pages-artifact@v3
43 | with:
44 | path: docs/_build/html
45 | - name: Deploy to GitHub Pages
46 | id: deploy
47 | uses: actions/deploy-pages@v4
48 |
--------------------------------------------------------------------------------
/.github/workflows/pypi.yml:
--------------------------------------------------------------------------------
1 | name: Upload to PyPI
2 |
3 | on:
4 | push:
5 | tags: ["v*"]
6 |
7 | jobs:
8 | build-sdist:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - name: Checkout repository
12 | uses: actions/checkout@v4
13 | with:
14 | submodules: true
15 | - name: Set up Python
16 | uses: actions/setup-python@v5
17 | with:
18 | python-version: "3.11"
19 | - name: Build sources
20 | run: |-
21 | pip install build
22 | python -mbuild -n -s
23 | - name: Upload sources
24 | uses: actions/upload-artifact@v4
25 | with:
26 | name: dist-sources
27 | path: dist/*.tar.gz
28 | retention-days: 2
29 |
30 | build-wheels:
31 | runs-on: ${{matrix.os}}
32 | strategy:
33 | matrix:
34 | include:
35 | - { os: windows-2022, cibw_arch: AMD64 }
36 | - { os: windows-2022, cibw_arch: ARM64 }
37 | - { os: ubuntu-24.04, cibw_arch: x86_64 }
38 | - { os: ubuntu-24.04-arm, cibw_arch: aarch64 }
39 | - { os: macos-14, cibw_arch: arm64 }
40 | - { os: macos-13, cibw_arch: x86_64 }
41 | steps:
42 | - name: Checkout repository
43 | uses: actions/checkout@v4
44 | with:
45 | submodules: true
46 | - name: Build wheels
47 | uses: pypa/cibuildwheel@v2.22
48 | env:
49 | CIBW_ARCHS: ${{matrix.cibw_arch}}
50 | CIBW_SKIP: "pp* *-musllinux_aarch64"
51 | # FIXME: only skip win_arm64 when the tests stop crashing
52 | CIBW_TEST_SKIP: "*-win_*"
53 | - name: Upload wheels
54 | uses: actions/upload-artifact@v4
55 | with:
56 | name: dist-wheels-${{matrix.os}}-${{matrix.cibw_arch}}
57 | path: wheelhouse/*.whl
58 | retention-days: 2
59 |
60 | release:
61 | runs-on: ubuntu-latest
62 | needs: [build-sdist, build-wheels]
63 | steps:
64 | - name: Download artifacts
65 | uses: actions/download-artifact@v4
66 | with:
67 | path: dist
68 | pattern: dist-*
69 | merge-multiple: true
70 | - name: Check artifacts
71 | run: ls -l dist
72 | - name: Upload to pypi
73 | uses: pypa/gh-action-pypi-publish@release/v1
74 | with:
75 | password: ${{secrets.PYPI_API_TOKEN}}
76 | - name: Create GitHub release
77 | run: gh release create "$GITHUB_REF_NAME" --generate-notes
78 | env:
79 | GH_TOKEN: ${{github.token}}
80 | GH_REPO: ${{github.repository}}
81 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .tox
2 | .venv
3 | build
4 | dist
5 | *.py[cd]
6 | *.egg-info
7 | *.so
8 | __pycache__
9 | wheelhouse
10 | docs/_build
11 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "tree-sitter"]
2 | url = https://github.com/tree-sitter/tree-sitter
3 | path = tree_sitter/core
4 | branch = release-0.25
5 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2019 Max Brunsfeld, GitHub
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include tree_sitter/binding/types.h
2 |
3 | prune tree_sitter/core
4 | graft tree_sitter/core/lib/src
5 | graft tree_sitter/core/lib/include/tree_sitter
6 | prune tree_sitter/core/lib/src/wasm
7 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Python Tree-sitter
2 |
3 | [![CI][ci]](https://github.com/tree-sitter/py-tree-sitter/actions/workflows/ci.yml)
4 | [![pypi][pypi]](https://pypi.org/project/tree-sitter/)
5 | [![docs][docs]](https://tree-sitter.github.io/py-tree-sitter/)
6 |
7 | This module provides Python bindings to the [tree-sitter] parsing library.
8 |
9 | ## Installation
10 |
11 | The package has no library dependencies and provides pre-compiled wheels for all major platforms.
12 |
13 | > [!NOTE]
14 | > If your platform is not currently supported, please submit an [issue] on GitHub.
15 |
16 | ```sh
17 | pip install tree-sitter
18 | ```
19 |
20 | ## Usage
21 |
22 | ### Setup
23 |
24 | #### Install languages
25 |
26 | Tree-sitter language implementations also provide pre-compiled binary wheels.
27 | Let's take [Python][tree-sitter-python] as an example.
28 |
29 | ```sh
30 | pip install tree-sitter-python
31 | ```
32 |
33 | Then, you can load it as a `Language` object:
34 |
35 | ```python
36 | import tree_sitter_python as tspython
37 | from tree_sitter import Language, Parser
38 |
39 | PY_LANGUAGE = Language(tspython.language())
40 | ```
41 |
42 | ### Basic parsing
43 |
44 | Create a `Parser` and configure it to use a language:
45 |
46 | ```python
47 | parser = Parser(PY_LANGUAGE)
48 | ```
49 |
50 | Parse some source code:
51 |
52 | ```python
53 | tree = parser.parse(
54 | bytes(
55 | """
56 | def foo():
57 | if bar:
58 | baz()
59 | """,
60 | "utf8"
61 | )
62 | )
63 | ```
64 |
65 | If you have your source code in some data structure other than a bytes object,
66 | you can pass a "read" callable to the parse function.
67 |
68 | The read callable can use either the byte offset or point tuple to read from
69 | buffer and return source code as bytes object. An empty bytes object or None
70 | terminates parsing for that line. The bytes must be encoded as UTF-8 or UTF-16.
71 |
72 | For example, to use the byte offset with UTF-8 encoding:
73 |
74 | ```python
75 | src = bytes(
76 | """
77 | def foo():
78 | if bar:
79 | baz()
80 | """,
81 | "utf8",
82 | )
83 |
84 |
85 | def read_callable_byte_offset(byte_offset, point):
86 | return src[byte_offset : byte_offset + 1]
87 |
88 |
89 | tree = parser.parse(read_callable_byte_offset, encoding="utf8")
90 | ```
91 |
92 | And to use the point:
93 |
94 | ```python
95 | src_lines = ["\n", "def foo():\n", " if bar:\n", " baz()\n"]
96 |
97 |
98 | def read_callable_point(byte_offset, point):
99 | row, column = point
100 | if row >= len(src_lines) or column >= len(src_lines[row]):
101 | return None
102 | return src_lines[row][column:].encode("utf8")
103 |
104 |
105 | tree = parser.parse(read_callable_point, encoding="utf8")
106 | ```
107 |
108 | Inspect the resulting `Tree`:
109 |
110 | ```python
111 | root_node = tree.root_node
112 | assert root_node.type == 'module'
113 | assert root_node.start_point == (1, 0)
114 | assert root_node.end_point == (4, 0)
115 |
116 | function_node = root_node.children[0]
117 | assert function_node.type == 'function_definition'
118 | assert function_node.child_by_field_name('name').type == 'identifier'
119 |
120 | function_name_node = function_node.children[1]
121 | assert function_name_node.type == 'identifier'
122 | assert function_name_node.start_point == (1, 4)
123 | assert function_name_node.end_point == (1, 7)
124 |
125 | function_body_node = function_node.child_by_field_name("body")
126 |
127 | if_statement_node = function_body_node.child(0)
128 | assert if_statement_node.type == "if_statement"
129 |
130 | function_call_node = if_statement_node.child_by_field_name("consequence").child(0).child(0)
131 | assert function_call_node.type == "call"
132 |
133 | function_call_name_node = function_call_node.child_by_field_name("function")
134 | assert function_call_name_node.type == "identifier"
135 |
136 | function_call_args_node = function_call_node.child_by_field_name("arguments")
137 | assert function_call_args_node.type == "argument_list"
138 |
139 |
140 | assert str(root_node) == (
141 | "(module "
142 | "(function_definition "
143 | "name: (identifier) "
144 | "parameters: (parameters) "
145 | "body: (block "
146 | "(if_statement "
147 | "condition: (identifier) "
148 | "consequence: (block "
149 | "(expression_statement (call "
150 | "function: (identifier) "
151 | "arguments: (argument_list))))))))"
152 | )
153 | ```
154 |
155 | Or, to use the byte offset with UTF-16 encoding:
156 |
157 | ```python
158 | parser.language = JAVASCRIPT
159 | source_code = bytes("'😎' && '🐍'", "utf16")
160 |
161 | def read(byte_position, _):
162 | return source_code[byte_position: byte_position + 2]
163 |
164 | tree = parser.parse(read, encoding="utf16")
165 | root_node = tree.root_node
166 | statement_node = root_node.children[0]
167 | binary_node = statement_node.children[0]
168 | snake_node = binary_node.children[2]
169 | snake = source_code[snake_node.start_byte:snake_node.end_byte]
170 |
171 | assert binary_node.type == "binary_expression"
172 | assert snake_node.type == "string"
173 | assert snake.decode("utf16") == "'🐍'"
174 | ```
175 |
176 | ### Walking syntax trees
177 |
178 | If you need to traverse a large number of nodes efficiently, you can use
179 | a `TreeCursor`:
180 |
181 | ```python
182 | cursor = tree.walk()
183 |
184 | assert cursor.node.type == "module"
185 |
186 | assert cursor.goto_first_child()
187 | assert cursor.node.type == "function_definition"
188 |
189 | assert cursor.goto_first_child()
190 | assert cursor.node.type == "def"
191 |
192 | # Returns `False` because the `def` node has no children
193 | assert not cursor.goto_first_child()
194 |
195 | assert cursor.goto_next_sibling()
196 | assert cursor.node.type == "identifier"
197 |
198 | assert cursor.goto_next_sibling()
199 | assert cursor.node.type == "parameters"
200 |
201 | assert cursor.goto_parent()
202 | assert cursor.node.type == "function_definition"
203 | ```
204 |
205 | > [!IMPORTANT]
206 | > Keep in mind that the cursor can only walk into children of the node that it started from.
207 |
208 | See [examples/walk_tree.py] for a complete example of iterating over every node in a tree.
209 |
210 | ### Editing
211 |
212 | When a source file is edited, you can edit the syntax tree to keep it in sync with
213 | the source:
214 |
215 | ```python
216 | new_src = src[:5] + src[5 : 5 + 2].upper() + src[5 + 2 :]
217 |
218 | tree.edit(
219 | start_byte=5,
220 | old_end_byte=5,
221 | new_end_byte=5 + 2,
222 | start_point=(0, 5),
223 | old_end_point=(0, 5),
224 | new_end_point=(0, 5 + 2),
225 | )
226 | ```
227 |
228 | Then, when you're ready to incorporate the changes into a new syntax tree,
229 | you can call `Parser.parse` again, but pass in the old tree:
230 |
231 | ```python
232 | new_tree = parser.parse(new_src, tree)
233 | ```
234 |
235 | This will run much faster than if you were parsing from scratch.
236 |
237 | The `Tree.changed_ranges` method can be called on the _old_ tree to return
238 | the list of ranges whose syntactic structure has been changed:
239 |
240 | ```python
241 | for changed_range in tree.changed_ranges(new_tree):
242 | print("Changed range:")
243 | print(f" Start point {changed_range.start_point}")
244 | print(f" Start byte {changed_range.start_byte}")
245 | print(f" End point {changed_range.end_point}")
246 | print(f" End byte {changed_range.end_byte}")
247 | ```
248 |
249 | ### Pattern-matching
250 |
251 | You can search for patterns in a syntax tree using a [tree query]:
252 |
253 | ```python
254 | query = PY_LANGUAGE.query(
255 | """
256 | (function_definition
257 | name: (identifier) @function.def
258 | body: (block) @function.block)
259 |
260 | (call
261 | function: (identifier) @function.call
262 | arguments: (argument_list) @function.args)
263 | """
264 | )
265 | ```
266 |
267 | #### Captures
268 |
269 | ```python
270 | captures = query.captures(tree.root_node)
271 | assert len(captures) == 4
272 | assert captures["function.def"][0] == function_name_node
273 | assert captures["function.block"][0] == function_body_node
274 | assert captures["function.call"][0] == function_call_name_node
275 | assert captures["function.args"][0] == function_call_args_node
276 | ```
277 |
278 | #### Matches
279 |
280 | ```python
281 | matches = query.matches(tree.root_node)
282 | assert len(matches) == 2
283 |
284 | # first match
285 | assert matches[0][1]["function.def"] == [function_name_node]
286 | assert matches[0][1]["function.block"] == [function_body_node]
287 |
288 | # second match
289 | assert matches[1][1]["function.call"] == [function_call_name_node]
290 | assert matches[1][1]["function.args"] == [function_call_args_node]
291 | ```
292 |
293 | The difference between the two methods is that `Query.matches()` groups captures into matches,
294 | which is much more useful when your captures within a query relate to each other.
295 |
296 | To try out and explore the code referenced in this README, check out [examples/usage.py].
297 |
298 | [tree-sitter]: https://tree-sitter.github.io/tree-sitter/
299 | [issue]: https://github.com/tree-sitter/py-tree-sitter/issues/new
300 | [tree-sitter-python]: https://github.com/tree-sitter/tree-sitter-python
301 | [tree query]: https://tree-sitter.github.io/tree-sitter/using-parsers/queries
302 | [ci]: https://img.shields.io/github/actions/workflow/status/tree-sitter/py-tree-sitter/ci.yml?logo=github&label=CI
303 | [pypi]: https://img.shields.io/pypi/v/tree-sitter?logo=pypi&logoColor=ffd242&label=PyPI
304 | [docs]: https://img.shields.io/github/deployments/tree-sitter/py-tree-sitter/github-pages?logo=sphinx&label=Docs
305 | [examples/walk_tree.py]: https://github.com/tree-sitter/py-tree-sitter/blob/master/examples/walk_tree.py
306 | [examples/usage.py]: https://github.com/tree-sitter/py-tree-sitter/blob/master/examples/usage.py
307 |
--------------------------------------------------------------------------------
/docs/_static/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tree-sitter/py-tree-sitter/52c190d29c67ab84bf71b3e1e873138cc2146f8a/docs/_static/favicon.png
--------------------------------------------------------------------------------
/docs/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tree-sitter/py-tree-sitter/52c190d29c67ab84bf71b3e1e873138cc2146f8a/docs/_static/logo.png
--------------------------------------------------------------------------------
/docs/classes/tree_sitter.Language.rst:
--------------------------------------------------------------------------------
1 | Language
2 | ========
3 |
4 | .. autoclass:: tree_sitter.Language
5 |
6 | Methods
7 | -------
8 |
9 | .. automethod:: copy
10 | .. automethod:: field_id_for_name
11 | .. automethod:: field_name_for_id
12 | .. automethod:: id_for_node_kind
13 | .. automethod:: lookahead_iterator
14 | .. automethod:: next_state
15 | .. automethod:: node_kind_for_id
16 | .. automethod:: node_kind_is_named
17 | .. automethod:: node_kind_is_supertype
18 | .. automethod:: node_kind_is_visible
19 | .. automethod:: query
20 |
21 | .. deprecated:: 0.25.0
22 | Use the :class:`Query` constructor instead.
23 | .. automethod:: subtypes
24 |
25 | .. versionadded:: 0.25.0
26 |
27 | Special Methods
28 | ---------------
29 |
30 | .. automethod:: __copy__
31 | .. automethod:: __eq__
32 | .. automethod:: __hash__
33 |
34 | .. important::
35 |
36 | On 32-bit platforms, you must use ``hash(self) & 0xFFFFFFFF`` to get the actual hash.
37 | .. automethod:: __ne__
38 | .. automethod:: __repr__
39 |
40 | Attributes
41 | ----------
42 |
43 | .. autoattribute:: abi_version
44 |
45 | .. versionadded:: 0.25.0
46 | .. autoattribute:: field_count
47 | .. autoattribute:: name
48 |
49 | .. versionadded:: 0.25.0
50 | .. autoattribute:: node_kind_count
51 | .. autoattribute:: parse_state_count
52 | .. autoattribute:: semantic_version
53 |
54 | .. versionadded:: 0.25.0
55 | .. autoattribute:: supertypes
56 |
57 | .. versionadded:: 0.25.0
58 | .. autoattribute:: version
59 |
60 | .. deprecated:: 0.25.0
61 | Use :attr:`abi_version` instead.
62 |
--------------------------------------------------------------------------------
/docs/classes/tree_sitter.LogType.rst:
--------------------------------------------------------------------------------
1 | LogType
2 | =======
3 |
4 | .. autoclass:: tree_sitter.LogType
5 | :show-inheritance:
6 |
7 | Members
8 | -------
9 |
10 | .. autoattribute:: PARSE
11 | .. autoattribute:: LEX
12 |
--------------------------------------------------------------------------------
/docs/classes/tree_sitter.LookaheadIterator.rst:
--------------------------------------------------------------------------------
1 | LookaheadIterator
2 | =================
3 |
4 | .. autoclass:: tree_sitter.LookaheadIterator
5 | :show-inheritance:
6 |
7 | Methods
8 | -------
9 |
10 | .. automethod:: names
11 |
12 | .. versionadded:: 0.25.0
13 | Replaces the ``iter_names`` method
14 | .. automethod:: reset
15 |
16 | .. versionadded:: 0.25.0
17 | Replaces the ``reset_state`` method
18 | .. automethod:: symbols
19 |
20 | .. versionadded:: 0.25.0
21 |
22 | Special Methods
23 | ---------------
24 |
25 | .. automethod:: __iter__
26 |
27 | .. versionchanged:: 0.25.0
28 | Iterates over ``tuple[int, str]``
29 | .. automethod:: __next__
30 |
31 | .. versionchanged:: 0.25.0
32 | Yields ``tuple[int, str]``
33 |
34 | Attributes
35 | ----------
36 |
37 | .. autoattribute:: current_symbol
38 | .. autoattribute:: current_symbol_name
39 | .. autoattribute:: language
40 |
--------------------------------------------------------------------------------
/docs/classes/tree_sitter.Node.rst:
--------------------------------------------------------------------------------
1 | Node
2 | ====
3 |
4 | .. autoclass:: tree_sitter.Node
5 |
6 | Methods
7 | -------
8 |
9 | .. automethod:: child
10 | .. automethod:: child_by_field_id
11 | .. automethod:: child_by_field_name
12 | .. automethod:: child_with_descendant
13 | .. automethod:: children_by_field_id
14 | .. automethod:: children_by_field_name
15 | .. automethod:: descendant_for_byte_range
16 | .. automethod:: descendant_for_point_range
17 | .. automethod:: edit
18 | .. automethod:: field_name_for_child
19 | .. automethod:: field_name_for_named_child
20 | .. automethod:: first_child_for_byte
21 |
22 | .. versionadded:: 0.25.0
23 | .. automethod:: first_named_child_for_byte
24 |
25 | .. versionadded:: 0.25.0
26 | .. automethod:: named_child
27 | .. automethod:: named_descendant_for_byte_range
28 | .. automethod:: named_descendant_for_point_range
29 | .. automethod:: walk
30 |
31 | Special Methods
32 | ---------------
33 |
34 | .. automethod:: __eq__
35 | .. automethod:: __hash__
36 | .. automethod:: __ne__
37 | .. automethod:: __repr__
38 | .. automethod:: __str__
39 |
40 | Attributes
41 | ----------
42 |
43 | .. autoattribute:: byte_range
44 | .. autoattribute:: child_count
45 | .. autoattribute:: children
46 | .. autoattribute:: descendant_count
47 | .. autoattribute:: end_byte
48 | .. autoattribute:: end_point
49 | .. autoattribute:: grammar_id
50 | .. autoattribute:: grammar_name
51 | .. autoattribute:: has_changes
52 | .. autoattribute:: has_error
53 | .. autoattribute:: id
54 | .. autoattribute:: is_error
55 | .. autoattribute:: is_extra
56 | .. autoattribute:: is_missing
57 | .. autoattribute:: is_named
58 | .. autoattribute:: kind_id
59 | .. autoattribute:: named_child_count
60 | .. autoattribute:: named_children
61 | .. autoattribute:: next_named_sibling
62 | .. autoattribute:: next_parse_state
63 | .. autoattribute:: next_sibling
64 | .. autoattribute:: parent
65 | .. autoattribute:: parse_state
66 | .. autoattribute:: prev_named_sibling
67 | .. autoattribute:: prev_sibling
68 | .. autoattribute:: range
69 | .. autoattribute:: start_byte
70 | .. autoattribute:: start_point
71 | .. autoattribute:: text
72 | .. autoattribute:: type
73 |
--------------------------------------------------------------------------------
/docs/classes/tree_sitter.Parser.rst:
--------------------------------------------------------------------------------
1 | Parser
2 | ======
3 |
4 | .. autoclass:: tree_sitter.Parser
5 |
6 | Methods
7 | -------
8 |
9 | .. automethod:: parse
10 |
11 | .. versionchanged:: 0.25.0
12 | * ``encoding`` can be one of ``"utf8", "utf16", "utf16le", "utf16be"``.
13 | * ``progress_callback`` parameter added.
14 | .. automethod:: print_dot_graphs
15 | .. automethod:: reset
16 |
17 | Attributes
18 | ----------
19 |
20 | .. autoattribute:: included_ranges
21 | .. autoattribute:: language
22 | .. autoattribute:: logger
23 | .. autoattribute:: timeout_micros
24 |
25 | .. deprecated:: 0.25.0
26 | Use the ``progress_callback`` in :meth:`parse`.
27 |
--------------------------------------------------------------------------------
/docs/classes/tree_sitter.Point.rst:
--------------------------------------------------------------------------------
1 | Point
2 | =====
3 |
4 | .. autoclass:: tree_sitter.Point
5 | :show-inheritance:
6 |
7 | Attributes
8 | ----------
9 |
10 | .. autoattribute:: column
11 | .. autoattribute:: row
12 |
--------------------------------------------------------------------------------
/docs/classes/tree_sitter.Query.rst:
--------------------------------------------------------------------------------
1 | Query
2 | =====
3 |
4 | .. autoclass:: tree_sitter.Query
5 |
6 | .. seealso:: `Query Syntax`_
7 |
8 | .. _Query Syntax: https://tree-sitter.github.io/tree-sitter/using-parsers#query-syntax
9 |
10 | .. note::
11 |
12 | The following predicates are supported by default:
13 |
14 | * ``#eq?``, ``#not-eq?``, ``#any-eq?``, ``#any-not-eq?``
15 | * ``#match?``, ``#not-match?``, ``#any-match?``, ``#any-not-match?``
16 | * ``#any-of?``, ``#not-any-of?``
17 | * ``#is?``, ``#is-not?``
18 | * ``#set!``
19 |
20 | Methods
21 | -------
22 |
23 | .. automethod:: capture_name
24 |
25 | .. versionadded:: 0.25.0
26 | .. automethod:: capture_quantifier
27 |
28 | .. versionadded:: 0.25.0
29 | .. automethod:: disable_capture
30 | .. automethod:: disable_pattern
31 | .. automethod:: end_byte_for_pattern
32 | .. automethod:: is_pattern_guaranteed_at_step
33 | .. automethod:: is_pattern_non_local
34 | .. automethod:: is_pattern_rooted
35 | .. automethod:: pattern_assertions
36 | .. automethod:: pattern_settings
37 | .. automethod:: start_byte_for_pattern
38 | .. automethod:: string_value
39 |
40 | .. versionadded:: 0.25.0
41 |
42 | Attributes
43 | ----------
44 |
45 | .. autoattribute:: capture_count
46 | .. autoattribute:: pattern_count
47 | .. autoattribute:: string_count
48 |
49 | .. versionadded:: 0.25.0
50 |
--------------------------------------------------------------------------------
/docs/classes/tree_sitter.QueryCursor.rst:
--------------------------------------------------------------------------------
1 | Query
2 | =====
3 |
4 | .. autoclass:: tree_sitter.QueryCursor
5 |
6 | .. versionadded:: 0.25.0
7 |
8 | Methods
9 | -------
10 |
11 | .. automethod:: captures
12 | .. automethod:: matches
13 | .. automethod:: set_byte_range
14 | .. automethod:: set_max_start_depth
15 | .. automethod:: set_point_range
16 |
17 | Attributes
18 | ----------
19 |
20 | .. autoattribute:: did_exceed_match_limit
21 | .. autoattribute:: match_limit
22 | .. autoattribute:: timeout_micros
23 |
--------------------------------------------------------------------------------
/docs/classes/tree_sitter.QueryError.rst:
--------------------------------------------------------------------------------
1 | QueryError
2 | ==========
3 |
4 | .. autoclass:: tree_sitter.QueryError
5 | :show-inheritance:
6 |
--------------------------------------------------------------------------------
/docs/classes/tree_sitter.QueryPredicate.rst:
--------------------------------------------------------------------------------
1 | QueryPredicate
2 | ==============
3 |
4 | .. autoclass:: tree_sitter.QueryPredicate
5 | :show-inheritance:
6 |
7 | Special Methods
8 | ---------------
9 |
10 | .. automethod:: __call__
11 |
--------------------------------------------------------------------------------
/docs/classes/tree_sitter.Range.rst:
--------------------------------------------------------------------------------
1 | Range
2 | =====
3 |
4 | .. autoclass:: tree_sitter.Range
5 |
6 | Special Methods
7 | ---------------
8 |
9 | .. automethod:: __eq__
10 | .. automethod:: __ne__
11 | .. automethod:: __repr__
12 | .. automethod:: __hash__
13 |
14 | Attributes
15 | ----------
16 |
17 | .. autoattribute:: end_byte
18 | .. autoattribute:: end_point
19 | .. autoattribute:: start_byte
20 | .. autoattribute:: start_point
21 |
--------------------------------------------------------------------------------
/docs/classes/tree_sitter.Tree.rst:
--------------------------------------------------------------------------------
1 | Tree
2 | ====
3 |
4 | .. autoclass:: tree_sitter.Tree
5 |
6 | Methods
7 | -------
8 |
9 | .. automethod:: changed_ranges
10 | .. automethod:: copy
11 | .. automethod:: edit
12 | .. automethod:: print_dot_graph
13 | .. automethod:: root_node_with_offset
14 | .. automethod:: walk
15 |
16 | Special Methods
17 | ---------------
18 |
19 | .. automethod:: __copy__
20 |
21 | Attributes
22 | ----------
23 |
24 | .. autoattribute:: included_ranges
25 | .. autoattribute:: language
26 | .. autoattribute:: root_node
27 |
--------------------------------------------------------------------------------
/docs/classes/tree_sitter.TreeCursor.rst:
--------------------------------------------------------------------------------
1 | TreeCursor
2 | ----------
3 |
4 | .. autoclass:: tree_sitter.TreeCursor
5 |
6 | Methods
7 | -------
8 |
9 | .. automethod:: copy
10 | .. automethod:: goto_descendant
11 | .. automethod:: goto_first_child
12 | .. automethod:: goto_first_child_for_byte
13 | .. automethod:: goto_first_child_for_point
14 | .. automethod:: goto_last_child
15 | .. automethod:: goto_next_sibling
16 | .. automethod:: goto_parent
17 | .. automethod:: goto_previous_sibling
18 | .. automethod:: reset
19 | .. automethod:: reset_to
20 |
21 | Special Methods
22 | ---------------
23 |
24 | .. automethod:: __copy__
25 |
26 | Attributes
27 | ----------
28 |
29 | .. autoattribute:: depth
30 | .. autoattribute:: descendant_index
31 | .. autoattribute:: field_id
32 | .. autoattribute:: field_name
33 | .. autoattribute:: node
34 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | from importlib.metadata import version as v
2 | from pathlib import PurePath
3 | from re import compile as regex
4 | from sys import path
5 |
6 | path.insert(0, str(PurePath(__file__).parents[2] / "tree_sitter"))
7 |
8 | project = "py-tree-sitter"
9 | author = "Max Brunsfeld"
10 | copyright = "2019, MIT license"
11 | release = v("tree_sitter")
12 |
13 | extensions = [
14 | "sphinx.ext.autodoc",
15 | "sphinx.ext.autosummary",
16 | "sphinx.ext.napoleon",
17 | "sphinx.ext.intersphinx",
18 | "sphinx.ext.githubpages",
19 | ]
20 | source_suffix = {
21 | ".rst": "restructuredtext"
22 | }
23 | master_doc = "index"
24 | language = "en"
25 | needs_sphinx = "8.1"
26 | templates_path = ["_templates"]
27 |
28 | intersphinx_mapping = {
29 | "python": ("https://docs.python.org/3.10/", None),
30 | }
31 |
32 | autoclass_content = "class"
33 | autodoc_member_order = "alphabetical"
34 | autosummary_generate = False
35 |
36 | napoleon_numpy_docstring = True
37 | napoleon_google_docstring = False
38 | napoleon_use_ivar = False
39 | napoleon_use_param = True
40 | napoleon_use_rtype = False
41 | napoleon_use_admonition_for_notes = True
42 |
43 | html_theme = "sphinx_book_theme"
44 | html_theme_options = {
45 | "repository_url": "https://github.com/tree-sitter/py-tree-sitter",
46 | "pygments_light_style": "default",
47 | "pygments_dark_style": "github-dark",
48 | "navigation_with_keys": False,
49 | "use_repository_button": True,
50 | "use_download_button": False,
51 | "use_fullscreen_button": False,
52 | "show_toc_level": 2,
53 | }
54 | html_static_path = ["_static"]
55 | html_logo = "_static/logo.png"
56 | html_favicon = "_static/favicon.png"
57 |
58 |
59 | special_doc = regex(r"\S*self[^.]+")
60 |
61 |
62 | def process_signature(_app, _what, name, _obj, _options, _signature, return_annotation):
63 | if name == "tree_sitter.Language":
64 | return "(ptr)", return_annotation
65 | if name == "tree_sitter.Query":
66 | return "(language, source)", return_annotation
67 | if name == "tree_sitter.QueryCursor":
68 | return "(query, *, match_limit=None, timeout_micros=None)", return_annotation
69 | if name == "tree_sitter.Parser":
70 | return "(language, *, included_ranges=None, timeout_micros=None)", return_annotation
71 | if name == "tree_sitter.Range":
72 | return "(start_point, end_point, start_byte, end_byte)", return_annotation
73 | if name == "tree_sitter.QueryPredicate":
74 | return None, return_annotation
75 | if name == "tree_sitter.LogType":
76 | return None, return_annotation
77 |
78 |
79 | def process_docstring(_app, what, name, _obj, _options, lines):
80 | if what == "data":
81 | lines.clear()
82 | elif what == "method":
83 | if name.endswith("__index__"):
84 | lines[0] = "Converts ``self`` to an integer for use as an index."
85 | elif name.endswith("__") and lines and "self" in lines[0]:
86 | lines[0] = f"Implements ``{special_doc.search(lines[0]).group(0)}``."
87 |
88 |
89 | def process_bases(_app, name, _obj, _options, bases):
90 | if name == "tree_sitter.Point":
91 | bases[-1] = ":class:`~typing.NamedTuple`"
92 | if name == "tree_sitter.LogType":
93 | bases[-1] = ":class:`~enum.IntEnum`"
94 | if name == "tree_sitter.LookaheadIterator":
95 | bases[-1] = ":class:`~collections.abc.Iterator`"
96 |
97 |
98 | def setup(app):
99 | app.connect("autodoc-process-signature", process_signature)
100 | app.connect("autodoc-process-docstring", process_docstring)
101 | app.connect("autodoc-process-bases", process_bases)
102 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | py-tree-sitter
2 | ==============
3 |
4 | Python bindings to the Tree-sitter parsing library.
5 |
6 | Constants
7 | ---------
8 |
9 | .. autodata:: tree_sitter.LANGUAGE_VERSION
10 |
11 | The latest ABI version that is supported by the current version of the library.
12 |
13 | .. note::
14 |
15 | When a :class:`Language` is generated by the Tree-sitter CLI, it is assigned
16 | an ABI version number that corresponds to the current CLI version.
17 | The Tree-sitter library is generally backwards-compatible with languages
18 | generated using older CLI versions, but is not forwards-compatible.
19 |
20 | .. autodata:: tree_sitter.MIN_COMPATIBLE_LANGUAGE_VERSION
21 |
22 | The earliest ABI version that is supported by the current version of the library.
23 |
24 |
25 | Classes
26 | -------
27 |
28 | .. autosummary::
29 | :toctree: classes
30 | :nosignatures:
31 |
32 | tree_sitter.Language
33 | tree_sitter.LogType
34 | tree_sitter.LookaheadIterator
35 | tree_sitter.Node
36 | tree_sitter.Parser
37 | tree_sitter.Point
38 | tree_sitter.Query
39 | tree_sitter.QueryError
40 | tree_sitter.QueryPredicate
41 | tree_sitter.Range
42 | tree_sitter.Tree
43 | tree_sitter.TreeCursor
44 |
--------------------------------------------------------------------------------
/examples/usage.py:
--------------------------------------------------------------------------------
1 | from tree_sitter import Language, Parser
2 | import tree_sitter_python
3 |
4 | PY_LANGUAGE = Language(tree_sitter_python.language())
5 |
6 | parser = Parser(PY_LANGUAGE)
7 |
8 | # parsing a string of code
9 | tree = parser.parse(
10 | bytes(
11 | """
12 | def foo():
13 | if bar:
14 | baz()
15 | """,
16 | "utf8",
17 | )
18 | )
19 |
20 | # parsing a callable by using the byte offset
21 | src = bytes(
22 | """
23 | def foo():
24 | if bar:
25 | baz()
26 | """,
27 | "utf8",
28 | )
29 |
30 |
31 | def read_callable_byte_offset(byte_offset, _):
32 | return src[byte_offset : byte_offset + 1]
33 |
34 |
35 | tree = parser.parse(read_callable_byte_offset)
36 |
37 |
38 | # parsing a callable by using the point
39 | src_lines = ["\n", "def foo():\n", " if bar:\n", " baz()\n"]
40 |
41 |
42 | def read_callable_point(_, point):
43 | row, column = point
44 | if row >= len(src_lines) or column >= len(src_lines[row]):
45 | return None
46 | return src_lines[row][column:].encode("utf8")
47 |
48 |
49 | tree = parser.parse(read_callable_point)
50 |
51 | # inspecting nodes in the tree
52 | root_node = tree.root_node
53 | assert root_node.type == "module"
54 | assert root_node.start_point == (1, 0)
55 | assert root_node.end_point == (4, 0)
56 |
57 | function_node = root_node.child(0)
58 | assert function_node.type == "function_definition"
59 | assert function_node.child_by_field_name("name").type == "identifier"
60 |
61 | function_name_node = function_node.child(1)
62 | assert function_name_node.type == "identifier"
63 | assert function_name_node.start_point == (1, 4)
64 | assert function_name_node.end_point == (1, 7)
65 |
66 | function_body_node = function_node.child_by_field_name("body")
67 |
68 | if_statement_node = function_body_node.child(0)
69 | assert if_statement_node.type == "if_statement"
70 |
71 | function_call_node = if_statement_node.child_by_field_name("consequence").child(0).child(0)
72 | assert function_call_node.type == "call"
73 |
74 | function_call_name_node = function_call_node.child_by_field_name("function")
75 | assert function_call_name_node.type == "identifier"
76 |
77 | function_call_args_node = function_call_node.child_by_field_name("arguments")
78 | assert function_call_args_node.type == "argument_list"
79 |
80 |
81 | # getting the sexp representation of the tree
82 | assert str(root_node) == (
83 | "(module "
84 | "(function_definition "
85 | "name: (identifier) "
86 | "parameters: (parameters) "
87 | "body: (block "
88 | "(if_statement "
89 | "condition: (identifier) "
90 | "consequence: (block "
91 | "(expression_statement (call "
92 | "function: (identifier) "
93 | "arguments: (argument_list))))))))"
94 | )
95 |
96 | # walking the tree
97 | cursor = tree.walk()
98 |
99 | assert cursor.node.type == "module"
100 |
101 | assert cursor.goto_first_child()
102 | assert cursor.node.type == "function_definition"
103 |
104 | assert cursor.goto_first_child()
105 | assert cursor.node.type == "def"
106 |
107 | # Returns `False` because the `def` node has no children
108 | assert not cursor.goto_first_child()
109 |
110 | assert cursor.goto_next_sibling()
111 | assert cursor.node.type == "identifier"
112 |
113 | assert cursor.goto_next_sibling()
114 | assert cursor.node.type == "parameters"
115 |
116 | assert cursor.goto_parent()
117 | assert cursor.node.type == "function_definition"
118 |
119 | # editing the tree
120 | new_src = src[:5] + src[5 : 5 + 2].upper() + src[5 + 2 :]
121 |
122 | tree.edit(
123 | start_byte=5,
124 | old_end_byte=5,
125 | new_end_byte=5 + 2,
126 | start_point=(0, 5),
127 | old_end_point=(0, 5),
128 | new_end_point=(0, 5 + 2),
129 | )
130 |
131 | new_tree = parser.parse(new_src, tree)
132 |
133 | # inspecting the changes
134 | for changed_range in tree.changed_ranges(new_tree):
135 | print("Changed range:")
136 | print(f" Start point {changed_range.start_point}")
137 | print(f" Start byte {changed_range.start_byte}")
138 | print(f" End point {changed_range.end_point}")
139 | print(f" End byte {changed_range.end_byte}")
140 |
141 |
142 | # querying the tree
143 | query = PY_LANGUAGE.query(
144 | """
145 | (function_definition
146 | name: (identifier) @function.def
147 | body: (block) @function.block)
148 |
149 | (call
150 | function: (identifier) @function.call
151 | arguments: (argument_list) @function.args)
152 | """
153 | )
154 |
155 | # ...with captures
156 | captures = query.captures(tree.root_node)
157 | assert len(captures) == 4
158 | assert captures["function.def"][0] == function_name_node
159 | assert captures["function.block"][0] == function_body_node
160 | assert captures["function.call"][0] == function_call_name_node
161 | assert captures["function.args"][0] == function_call_args_node
162 |
163 | # ...with matches
164 | matches = query.matches(tree.root_node)
165 | assert len(matches) == 2
166 |
167 | # first match
168 | assert matches[0][1]["function.def"] == [function_name_node]
169 | assert matches[0][1]["function.block"] == [function_body_node]
170 |
171 | # second match
172 | assert matches[1][1]["function.call"] == [function_call_name_node]
173 | assert matches[1][1]["function.args"] == [function_call_args_node]
174 |
--------------------------------------------------------------------------------
/examples/walk_tree.py:
--------------------------------------------------------------------------------
1 | from typing import Generator
2 | from tree_sitter import Language, Parser, Tree, Node
3 | import tree_sitter_python
4 |
5 | PY_LANGUAGE = Language(tree_sitter_python.language())
6 |
7 | parser = Parser()
8 | parser.language = PY_LANGUAGE
9 |
10 | tree = parser.parse(bytes("a = 1", "utf8"))
11 |
12 |
13 | def traverse_tree(tree: Tree) -> Generator[Node, None, None]:
14 | cursor = tree.walk()
15 |
16 | visited_children = False
17 | while True:
18 | if not visited_children:
19 | yield cursor.node
20 | if not cursor.goto_first_child():
21 | visited_children = True
22 | elif cursor.goto_next_sibling():
23 | visited_children = False
24 | elif not cursor.goto_parent():
25 | break
26 |
27 |
28 | node_names = map(lambda node: node.type, traverse_tree(tree))
29 |
30 | assert list(node_names) == [
31 | "module",
32 | "expression_statement",
33 | "assignment",
34 | "identifier",
35 | "=",
36 | "integer",
37 | ]
38 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=43"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "tree-sitter"
7 | version = "0.24.0"
8 | description = "Python bindings to the Tree-sitter parsing library"
9 | keywords = ["incremental", "parsing", "tree-sitter"]
10 | classifiers = [
11 | "Intended Audience :: Developers",
12 | "License :: OSI Approved :: MIT License",
13 | "Operating System :: OS Independent",
14 | "Programming Language :: C",
15 | "Programming Language :: Python",
16 | "Topic :: Software Development :: Compilers",
17 | "Topic :: Text Processing :: Linguistic",
18 | "Typing :: Typed",
19 | ]
20 | requires-python = ">=3.10"
21 | readme = "README.md"
22 |
23 | [project.urls]
24 | Homepage = "https://tree-sitter.github.io/tree-sitter/"
25 | Source = "https://github.com/tree-sitter/py-tree-sitter"
26 | Documentation = "https://tree-sitter.github.io/py-tree-sitter/"
27 | Discord = "https://discord.gg/w7nTvsVJhm"
28 | Matrix = "https://matrix.to/#/#tree-sitter-chat:matrix.org"
29 |
30 | [[project.authors]]
31 | name = "Max Brunsfeld"
32 | email = "maxbrunsfeld@gmail.com"
33 |
34 | [project.optional-dependencies]
35 | docs = ["sphinx~=8.1", "sphinx-book-theme"]
36 | tests = [
37 | "tree-sitter-html>=0.23.2",
38 | "tree-sitter-javascript>=0.23.1",
39 | "tree-sitter-json>=0.24.8",
40 | "tree-sitter-python>=0.23.6",
41 | "tree-sitter-rust>=0.23.2",
42 | ]
43 |
44 | [tool.ruff]
45 | target-version = "py310"
46 | line-length = 100
47 | indent-width = 4
48 | extend-exclude = [
49 | ".github",
50 | "__pycache__",
51 | "setup.py",
52 | "tree_sitter/core",
53 | ]
54 |
55 | [tool.ruff.format]
56 | quote-style = "double"
57 | indent-style = "space"
58 |
59 | [tool.cibuildwheel]
60 | build-frontend = "build"
61 | test-extras = ["tests"]
62 | test-command = "python -munittest discover -s {project}/tests"
63 |
64 | [tool.mypy]
65 | exclude = ["tree_sitter/core"]
66 |
67 | [tool.rstcheck]
68 | ignore_directives = [
69 | "autoclass",
70 | "autodata",
71 | "automethod",
72 | "autosummary"
73 | ]
74 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from platform import system
2 |
3 | from setuptools import Extension, setup # type: ignore
4 |
5 | setup(
6 | packages=["tree_sitter"],
7 | include_package_data=False,
8 | package_data={
9 | "tree_sitter": ["py.typed", "*.pyi"],
10 | },
11 | ext_modules=[
12 | Extension(
13 | name="tree_sitter._binding",
14 | sources=[
15 | "tree_sitter/core/lib/src/lib.c",
16 | "tree_sitter/binding/language.c",
17 | "tree_sitter/binding/lookahead_iterator.c",
18 | "tree_sitter/binding/node.c",
19 | "tree_sitter/binding/parser.c",
20 | "tree_sitter/binding/query.c",
21 | "tree_sitter/binding/query_cursor.c",
22 | "tree_sitter/binding/query_predicates.c",
23 | "tree_sitter/binding/range.c",
24 | "tree_sitter/binding/tree.c",
25 | "tree_sitter/binding/tree_cursor.c",
26 | "tree_sitter/binding/module.c",
27 | ],
28 | include_dirs=[
29 | "tree_sitter/binding",
30 | "tree_sitter/core/lib/include",
31 | "tree_sitter/core/lib/src",
32 | ],
33 | define_macros=[
34 | ("_POSIX_C_SOURCE", "200112L"),
35 | ("_DEFAULT_SOURCE", None),
36 | ("PY_SSIZE_T_CLEAN", None),
37 | ("TREE_SITTER_HIDE_SYMBOLS", None),
38 | ],
39 | undef_macros=[
40 | "TREE_SITTER_FEATURE_WASM",
41 | ],
42 | extra_compile_args=[
43 | "-std=c11",
44 | "-fvisibility=hidden",
45 | "-Wno-cast-function-type",
46 | "-Werror=implicit-function-declaration",
47 | ] if system() != "Windows" else [
48 | "/std:c11",
49 | "/wd4244",
50 | ],
51 | )
52 | ],
53 | )
54 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tree-sitter/py-tree-sitter/52c190d29c67ab84bf71b3e1e873138cc2146f8a/tests/__init__.py
--------------------------------------------------------------------------------
/tests/test_language.py:
--------------------------------------------------------------------------------
1 | from sys import maxsize
2 | from typing import cast
3 | from unittest import TestCase
4 |
5 | from tree_sitter import Language, Query
6 |
7 | import tree_sitter_html
8 | import tree_sitter_javascript
9 | import tree_sitter_json
10 | import tree_sitter_python
11 | import tree_sitter_rust
12 |
13 |
14 | class TestLanguage(TestCase):
15 | def setUp(self):
16 | self.html = tree_sitter_html.language()
17 | self.javascript = tree_sitter_javascript.language()
18 | self.json = tree_sitter_json.language()
19 | self.python = tree_sitter_python.language()
20 | self.rust = tree_sitter_rust.language()
21 |
22 | def test_init_invalid(self):
23 | self.assertRaises(ValueError, Language, 42)
24 |
25 | def test_properties(self):
26 | lang = Language(self.python)
27 | self.assertEqual(lang.abi_version, 14)
28 | self.assertEqual(lang.node_kind_count, 275)
29 | self.assertEqual(lang.parse_state_count, 2809)
30 | self.assertEqual(lang.field_count, 32)
31 |
32 | def test_node_kind_for_id(self):
33 | lang = Language(self.json)
34 | self.assertEqual(lang.node_kind_for_id(1), "{")
35 | self.assertEqual(lang.node_kind_for_id(3), "}")
36 |
37 | def test_id_for_node_kind(self):
38 | lang = Language(self.json)
39 | self.assertEqual(lang.id_for_node_kind(":", False), 4)
40 | self.assertEqual(lang.id_for_node_kind("string", True), 20)
41 |
42 | def test_node_kind_is_named(self):
43 | lang = Language(self.json)
44 | self.assertFalse(lang.node_kind_is_named(4))
45 | self.assertTrue(lang.node_kind_is_named(20))
46 |
47 | def test_node_kind_is_visible(self):
48 | lang = Language(self.json)
49 | self.assertTrue(lang.node_kind_is_visible(2))
50 |
51 | def test_field_name_for_id(self):
52 | lang = Language(self.json)
53 | self.assertEqual(lang.field_name_for_id(1), "key")
54 | self.assertEqual(lang.field_name_for_id(2), "value")
55 |
56 | def test_field_id_for_name(self):
57 | lang = Language(self.json)
58 | self.assertEqual(lang.field_id_for_name("key"), 1)
59 | self.assertEqual(lang.field_id_for_name("value"), 2)
60 |
61 | def test_next_state(self):
62 | lang = Language(self.javascript)
63 | self.assertNotEqual(lang.next_state(1, 1), 0)
64 |
65 | def test_lookahead_iterator(self):
66 | lang = Language(self.javascript)
67 | self.assertIsNotNone(lang.lookahead_iterator(0))
68 | self.assertIsNone(lang.lookahead_iterator(9999))
69 |
70 | def test_eq(self):
71 | self.assertEqual(Language(self.json), Language(self.json))
72 | self.assertNotEqual(Language(self.rust), Language(self.html))
73 |
74 | def test_hash(self):
75 | for name in ["html", "javascript", "json", "python", "rust"]:
76 | with self.subTest(language=name):
77 | lang = Language(cast(object, getattr(self, name)))
78 | self.assertGreater(hash(lang) & maxsize << 1, 0)
79 |
--------------------------------------------------------------------------------
/tests/test_lookahead_iterator.py:
--------------------------------------------------------------------------------
1 | from typing import cast
2 | from unittest import TestCase
3 |
4 | from tree_sitter import Language, LookaheadIterator, Node, Parser
5 |
6 | import tree_sitter_rust
7 |
8 |
9 | class TestLookaheadIterator(TestCase):
10 | @classmethod
11 | def setUpClass(cls):
12 | cls.rust = Language(tree_sitter_rust.language())
13 |
14 | def test_lookahead_iterator(self):
15 | parser = Parser(self.rust)
16 | cursor = parser.parse(b"struct Stuff{}").walk()
17 |
18 | self.assertEqual(cursor.goto_first_child(), True) # struct
19 | self.assertEqual(cursor.goto_first_child(), True) # struct keyword
20 |
21 | node = cast(Node, cursor.node)
22 | next_state = node.next_parse_state
23 |
24 | self.assertNotEqual(next_state, 0)
25 | self.assertEqual(
26 | next_state, self.rust.next_state(node.parse_state, node.grammar_id)
27 | )
28 | self.assertLess(next_state, self.rust.parse_state_count)
29 | self.assertEqual(cursor.goto_next_sibling(), True) # type_identifier
30 | node = cast(Node, cursor.node)
31 | self.assertEqual(next_state, node.parse_state)
32 | self.assertEqual(node.grammar_name, "identifier")
33 | self.assertNotEqual(node.grammar_id, node.kind_id)
34 |
35 | expected_symbols = ["//", "/*", "identifier", "line_comment", "block_comment"]
36 | lookahead = cast(LookaheadIterator, self.rust.lookahead_iterator(next_state))
37 | self.assertEqual(lookahead.language, self.rust)
38 | self.assertListEqual(lookahead.names(), expected_symbols)
39 |
40 | lookahead.reset(next_state)
41 | self.assertListEqual(
42 | list(map(self.rust.node_kind_for_id, lookahead.symbols())), expected_symbols
43 | )
44 |
45 | lookahead.reset(next_state, self.rust)
46 | self.assertTupleEqual(
47 | (self.rust.id_for_node_kind("//", False), "//"),
48 | next(lookahead)
49 | )
50 |
--------------------------------------------------------------------------------
/tests/test_parser.py:
--------------------------------------------------------------------------------
1 | from typing import cast
2 | from unittest import TestCase
3 |
4 | from tree_sitter import Language, LogType, Node, Parser, Range, Tree
5 |
6 | import tree_sitter_html
7 | import tree_sitter_javascript
8 | import tree_sitter_json
9 | import tree_sitter_python
10 | import tree_sitter_rust
11 |
12 |
13 | def simple_range(start, end):
14 | return Range((0, start), (0, end), start, end)
15 |
16 |
17 | class TestParser(TestCase):
18 | @classmethod
19 | def setUpClass(cls):
20 | cls.html = Language(tree_sitter_html.language())
21 | cls.python = Language(tree_sitter_python.language())
22 | cls.javascript = Language(tree_sitter_javascript.language())
23 | cls.json = Language(tree_sitter_json.language())
24 | cls.rust = Language(tree_sitter_rust.language())
25 | cls.max_range = Range((0, 0), (0xFFFFFFFF, 0xFFFFFFFF), 0, 0xFFFFFFFF)
26 | cls.min_range = Range((0, 0), (0, 1), 0, 1)
27 | cls.timeout = 1000
28 |
29 | def test_init_no_args(self):
30 | parser = Parser()
31 | self.assertIsNone(parser.language)
32 | self.assertListEqual(parser.included_ranges, [self.max_range])
33 |
34 | def test_init_args(self):
35 | parser = Parser(
36 | language=self.python, included_ranges=[self.min_range]
37 | )
38 | self.assertEqual(parser.language, self.python)
39 | self.assertListEqual(parser.included_ranges, [self.min_range])
40 |
41 | def test_setters(self):
42 | parser = Parser()
43 |
44 | with self.subTest(setter="language"):
45 | parser.language = self.python
46 | self.assertEqual(parser.language, self.python)
47 |
48 | with self.subTest(setter="included_ranges"):
49 | parser.included_ranges = [self.min_range]
50 | self.assertListEqual(parser.included_ranges, [self.min_range])
51 | with self.assertRaises(ValueError):
52 | parser.included_ranges = [
53 | Range(
54 | start_byte=23,
55 | end_byte=29,
56 | start_point=(0, 23),
57 | end_point=(0, 29),
58 | ),
59 | Range(
60 | start_byte=0,
61 | end_byte=5,
62 | start_point=(0, 0),
63 | end_point=(0, 5),
64 | ),
65 | Range(
66 | start_byte=50,
67 | end_byte=60,
68 | start_point=(0, 50),
69 | end_point=(0, 60),
70 | ),
71 | ]
72 | with self.assertRaises(ValueError):
73 | parser.included_ranges = [
74 | Range(
75 | start_byte=10,
76 | end_byte=5,
77 | start_point=(0, 10),
78 | end_point=(0, 5),
79 | )
80 | ]
81 |
82 | with self.subTest(setter="logger"):
83 | def logger(log_type, message):
84 | print(log_type.name, message)
85 |
86 | parser.logger = logger
87 | self.assertEqual(parser.logger, logger)
88 |
89 | def test_deleters(self):
90 | parser = Parser()
91 |
92 | with self.subTest(deleter="language"):
93 | del parser.language
94 | self.assertIsNone(parser.language)
95 |
96 | with self.subTest(deleter="included_ranges"):
97 | del parser.included_ranges
98 | self.assertListEqual(parser.included_ranges, [self.max_range])
99 |
100 | with self.subTest(deleter="logger"):
101 | del parser.logger
102 | self.assertEqual(parser.logger, None)
103 |
104 | def test_parse_buffer(self):
105 | parser = Parser(self.javascript)
106 | with self.subTest(type="bytes"):
107 | self.assertIsInstance(parser.parse(b"test"), Tree)
108 | with self.subTest(type="memoryview"):
109 | self.assertIsInstance(parser.parse(memoryview(b"test")), Tree)
110 | with self.subTest(type="bytearray"):
111 | self.assertIsInstance(parser.parse(bytearray(b"test")), Tree)
112 |
113 | def test_parse_callback(self):
114 | parser = Parser(self.python)
115 | source_lines = ["def foo():\n", " bar()"]
116 |
117 | def read_callback(_, point):
118 | row, column = point
119 | if row >= len(source_lines):
120 | return None
121 | if column >= len(source_lines[row]):
122 | return None
123 | return source_lines[row][column:].encode("utf8")
124 |
125 | tree = parser.parse(read_callback)
126 | self.assertEqual(
127 | str(tree.root_node),
128 | "(module (function_definition"
129 | + " name: (identifier)"
130 | + " parameters: (parameters)"
131 | + " body: (block (expression_statement (call"
132 | + " function: (identifier)"
133 | + " arguments: (argument_list))))))",
134 | )
135 |
136 | def test_parse_utf16_encoding(self):
137 | source_code = bytes("'😎' && '🐍'", "utf16")
138 | parser = Parser(self.javascript)
139 |
140 | def read(byte_position, _):
141 | return source_code[byte_position : byte_position + 2]
142 |
143 | tree = parser.parse(read, encoding="utf16")
144 | root_node = tree.root_node
145 | snake_node = root_node.children[0].children[0].children[2]
146 | snake = source_code[snake_node.start_byte + 2 : snake_node.end_byte - 2]
147 |
148 | self.assertEqual(snake_node.type, "string")
149 | self.assertEqual(snake.decode("utf16"), "🐍")
150 | self.assertIs(tree.language, self.javascript)
151 |
152 | def test_parse_invalid_encoding(self):
153 | parser = Parser(self.python)
154 | with self.assertRaises(ValueError):
155 | parser.parse(b"foo", encoding="ascii") # pyright: ignore
156 |
157 | def test_parse_with_one_included_range(self):
158 | source_code = b"hi"
159 | parser = Parser(self.html)
160 | html_tree = parser.parse(source_code)
161 | script_content_node = cast(Node, html_tree.root_node.child(1)).child(1)
162 | self.assertIsNotNone(script_content_node)
163 | script_content_node = cast(Node, script_content_node)
164 | self.assertEqual(script_content_node.type, "raw_text")
165 |
166 | parser.included_ranges = [script_content_node.range]
167 | parser.language = self.javascript
168 | js_tree = parser.parse(source_code)
169 | self.assertEqual(
170 | str(js_tree.root_node),
171 | "(program (expression_statement (call_expression"
172 | + " function: (member_expression object: (identifier) property: (property_identifier))"
173 | + " arguments: (arguments (string (string_fragment))))))",
174 | )
175 | self.assertEqual(js_tree.root_node.start_point, (0, source_code.index(b"console")))
176 | self.assertEqual(js_tree.included_ranges, [script_content_node.range])
177 |
178 | def test_parse_with_multiple_included_ranges(self):
179 | source_code = b"html `
Hello, ${name.toUpperCase()}, it's ${now()}.
`"
180 |
181 | parser = Parser(self.javascript)
182 | js_tree = parser.parse(source_code)
183 | template_string_node = js_tree.root_node.descendant_for_byte_range(
184 | source_code.index(b"`<"), source_code.index(b">`")
185 | )
186 | self.assertIsNotNone(template_string_node)
187 | template_string_node = cast(Node, template_string_node)
188 |
189 | self.assertEqual(template_string_node.type, "template_string")
190 |
191 | open_quote_node = cast(Node, template_string_node.child(0))
192 | self.assertIsNotNone(open_quote_node)
193 | interpolation_node1 = cast(Node, template_string_node.child(2))
194 | self.assertIsNotNone(interpolation_node1)
195 | interpolation_node2 = cast(Node, template_string_node.child(4))
196 | self.assertIsNotNone(interpolation_node2)
197 | close_quote_node = cast(Node, template_string_node.child(6))
198 | self.assertIsNotNone(close_quote_node)
199 |
200 | html_ranges = [
201 | Range(
202 | start_byte=open_quote_node.end_byte,
203 | start_point=open_quote_node.end_point,
204 | end_byte=interpolation_node1.start_byte,
205 | end_point=interpolation_node1.start_point,
206 | ),
207 | Range(
208 | start_byte=interpolation_node1.end_byte,
209 | start_point=interpolation_node1.end_point,
210 | end_byte=interpolation_node2.start_byte,
211 | end_point=interpolation_node2.start_point,
212 | ),
213 | Range(
214 | start_byte=interpolation_node2.end_byte,
215 | start_point=interpolation_node2.end_point,
216 | end_byte=close_quote_node.start_byte,
217 | end_point=close_quote_node.start_point,
218 | ),
219 | ]
220 | parser.included_ranges = html_ranges
221 | parser.language = self.html
222 | html_tree = parser.parse(source_code)
223 |
224 | self.assertEqual(
225 | str(html_tree.root_node),
226 | "(document (element"
227 | + " (start_tag (tag_name))"
228 | + " (text)"
229 | + " (element (start_tag (tag_name)) (end_tag (tag_name)))"
230 | + " (text)"
231 | + " (end_tag (tag_name))))",
232 | )
233 | self.assertEqual(html_tree.included_ranges, html_ranges)
234 |
235 | div_element_node = cast(Node, html_tree.root_node.child(0))
236 | self.assertIsNotNone(div_element_node)
237 | hello_text_node = cast(Node, div_element_node.child(1))
238 | self.assertIsNotNone(hello_text_node)
239 | b_element_node = cast(Node, div_element_node.child(2))
240 | self.assertIsNotNone(b_element_node)
241 | b_start_tag_node = cast(Node, b_element_node.child(0))
242 | self.assertIsNotNone(b_start_tag_node)
243 | b_end_tag_node = cast(Node, b_element_node.child(1))
244 | self.assertIsNotNone(b_end_tag_node)
245 |
246 | self.assertEqual(hello_text_node.type, "text")
247 | self.assertEqual(hello_text_node.start_byte, source_code.index(b"Hello"))
248 | self.assertEqual(hello_text_node.end_byte, source_code.index(b" "))
249 |
250 | self.assertEqual(b_start_tag_node.type, "start_tag")
251 | self.assertEqual(b_start_tag_node.start_byte, source_code.index(b""))
252 | self.assertEqual(b_start_tag_node.end_byte, source_code.index(b"${now()}"))
253 |
254 | self.assertEqual(b_end_tag_node.type, "end_tag")
255 | self.assertEqual(b_end_tag_node.start_byte, source_code.index(b""))
256 | self.assertEqual(b_end_tag_node.end_byte, source_code.index(b"."))
257 |
258 | def test_parse_with_included_range_containing_mismatched_positions(self):
259 | source_code = b"test
{_ignore_this_part_}"
260 | end_byte = source_code.index(b"{_ignore_this_part_")
261 |
262 | range_to_parse = Range(
263 | start_byte=0,
264 | start_point=(10, 12),
265 | end_byte=end_byte,
266 | end_point=(10, 12 + end_byte),
267 | )
268 |
269 | parser = Parser(self.html, included_ranges=[range_to_parse])
270 | html_tree = parser.parse(source_code)
271 |
272 | self.assertEqual(
273 | str(html_tree.root_node),
274 | "(document (element (start_tag (tag_name)) (text) (end_tag (tag_name))))",
275 | )
276 |
277 | def test_parse_with_included_range_boundaries(self):
278 | source_code = b"a <%= b() %> c <% d() %>"
279 | range1_start_byte = source_code.index(b" b() ")
280 | range1_end_byte = range1_start_byte + len(b" b() ")
281 | range2_start_byte = source_code.index(b" d() ")
282 | range2_end_byte = range2_start_byte + len(b" d() ")
283 |
284 | parser = Parser(
285 | self.javascript,
286 | included_ranges=[
287 | Range(
288 | start_byte=range1_start_byte,
289 | end_byte=range1_end_byte,
290 | start_point=(0, range1_start_byte),
291 | end_point=(0, range1_end_byte),
292 | ),
293 | Range(
294 | start_byte=range2_start_byte,
295 | end_byte=range2_end_byte,
296 | start_point=(0, range2_start_byte),
297 | end_point=(0, range2_end_byte),
298 | ),
299 | ],
300 | )
301 |
302 | tree = parser.parse(source_code)
303 | root = tree.root_node
304 | statement1 = cast(Node, root.child(0))
305 | self.assertIsNotNone(statement1)
306 | statement2 = cast(Node, root.child(1))
307 | self.assertIsNotNone(statement2)
308 |
309 | self.assertEqual(
310 | str(root),
311 | "(program"
312 | + " (expression_statement (call_expression"
313 | + " function: (identifier) arguments: (arguments)))"
314 | + " (expression_statement (call_expression"
315 | + " function: (identifier) arguments: (arguments))))",
316 | )
317 |
318 | self.assertEqual(statement1.start_byte, source_code.index(b"b()"))
319 | self.assertEqual(statement1.end_byte, source_code.find(b" %> c"))
320 | self.assertEqual(statement2.start_byte, source_code.find(b"d()"))
321 | self.assertEqual(statement2.end_byte, len(source_code) - len(" %>"))
322 |
323 | def test_parse_with_a_newly_excluded_range(self):
324 | source_code = b"<%= something %>
"
325 |
326 | # Parse HTML including the template directive, which will cause an error
327 | parser = Parser(self.html)
328 | first_tree = parser.parse(source_code)
329 |
330 | prefix = b"a very very long line of plain text. "
331 | first_tree.edit(
332 | start_byte=0,
333 | old_end_byte=0,
334 | new_end_byte=len(prefix),
335 | start_point=(0, 0),
336 | old_end_point=(0, 0),
337 | new_end_point=(0, len(prefix)),
338 | )
339 | source_code = prefix + source_code
340 |
341 | # Parse the HTML again, this time *excluding* the template directive
342 | # (which has moved since the previous parse).
343 | directive_start = source_code.index(b"<%=")
344 | directive_end = source_code.index(b"")
345 | source_code_end = len(source_code)
346 | parser.included_ranges = [
347 | Range(
348 | start_byte=0,
349 | end_byte=directive_start,
350 | start_point=(0, 0),
351 | end_point=(0, directive_start),
352 | ),
353 | Range(
354 | start_byte=directive_end,
355 | end_byte=source_code_end,
356 | start_point=(0, directive_end),
357 | end_point=(0, source_code_end),
358 | ),
359 | ]
360 |
361 | tree = parser.parse(source_code, first_tree)
362 |
363 | self.assertEqual(
364 | str(tree.root_node),
365 | "(document (text) (element"
366 | + " (start_tag (tag_name))"
367 | + " (element (start_tag (tag_name)) (end_tag (tag_name)))"
368 | + " (end_tag (tag_name))))",
369 | )
370 |
371 | self.assertEqual(
372 | tree.changed_ranges(first_tree),
373 | [
374 | # The first range that has changed syntax is the range of the newly-inserted text.
375 | Range(
376 | start_byte=0,
377 | end_byte=len(prefix),
378 | start_point=(0, 0),
379 | end_point=(0, len(prefix)),
380 | ),
381 | # Even though no edits were applied to the outer `div` element,
382 | # its contents have changed syntax because a range of text that
383 | # was previously included is now excluded.
384 | Range(
385 | start_byte=directive_start,
386 | end_byte=directive_end,
387 | start_point=(0, directive_start),
388 | end_point=(0, directive_end),
389 | ),
390 | ],
391 | )
392 |
393 | def test_parsing_with_a_newly_included_range(self):
394 | source_code = b"<%= foo() %>
<%= bar() %><%= baz() %>"
395 | range1_start = source_code.index(b" foo")
396 | range2_start = source_code.index(b" bar")
397 | range3_start = source_code.index(b" baz")
398 | range1_end = range1_start + 7
399 | range2_end = range2_start + 7
400 | range3_end = range3_start + 7
401 |
402 | # Parse only the first code directive as JavaScript
403 | parser = Parser(self.javascript)
404 | parser.included_ranges = [simple_range(range1_start, range1_end)]
405 | tree = parser.parse(source_code)
406 | self.assertEqual(
407 | str(tree.root_node),
408 | "(program"
409 | + " (expression_statement (call_expression"
410 | + " function: (identifier) arguments: (arguments))))",
411 | )
412 |
413 | # Parse both the first and third code directives as JavaScript, using the old tree as a
414 | # reference.
415 | parser.included_ranges = [
416 | simple_range(range1_start, range1_end),
417 | simple_range(range3_start, range3_end),
418 | ]
419 | tree2 = parser.parse(source_code)
420 | self.assertEqual(
421 | str(tree2.root_node),
422 | "(program"
423 | + " (expression_statement (call_expression"
424 | + " function: (identifier) arguments: (arguments)))"
425 | + " (expression_statement (call_expression"
426 | + " function: (identifier) arguments: (arguments))))",
427 | )
428 | self.assertEqual(tree2.changed_ranges(tree), [simple_range(range1_end, range3_end)])
429 |
430 | # Parse all three code directives as JavaScript, using the old tree as a
431 | # reference.
432 | parser.included_ranges = [
433 | simple_range(range1_start, range1_end),
434 | simple_range(range2_start, range2_end),
435 | simple_range(range3_start, range3_end),
436 | ]
437 | tree3 = parser.parse(source_code)
438 | self.assertEqual(
439 | str(tree3.root_node),
440 | "(program"
441 | + " (expression_statement (call_expression"
442 | + " function: (identifier) arguments: (arguments)))"
443 | + " (expression_statement (call_expression"
444 | + " function: (identifier) arguments: (arguments)))"
445 | + " (expression_statement (call_expression"
446 | + " function: (identifier) arguments: (arguments))))",
447 | )
448 | self.assertEqual(
449 | tree3.changed_ranges(tree2),
450 | [simple_range(range2_start + 1, range2_end - 1)],
451 | )
452 |
453 | def test_logging(self):
454 | from logging import getLogger
455 |
456 | def logger(log_type: LogType, message: str):
457 | match log_type:
458 | case LogType.PARSE:
459 | parse_logger.info(message)
460 | case LogType.LEX:
461 | lex_logger.info(message)
462 |
463 | parse_logger = getLogger("tree_sitter.PARSE")
464 | lex_logger = getLogger("tree_sitter.LEX")
465 | parser = Parser(self.python, logger=logger)
466 | with self.assertLogs("tree_sitter") as logs:
467 | parser.parse(b"foo")
468 |
469 | self.assertEqual(logs.records[0].name, "tree_sitter.PARSE")
470 | self.assertEqual(logs.records[0].message, "new_parse")
471 | self.assertEqual(logs.records[3].name, "tree_sitter.LEX")
472 | self.assertEqual(logs.records[3].message, "consume character:'f'")
473 |
474 | def test_dot_graphs(self):
475 | from tempfile import TemporaryFile
476 |
477 | new_parse = ["graph {\n", 'label="new_parse"\n', "}\n"]
478 | parser = Parser(self.python)
479 | with TemporaryFile("w+") as f:
480 | parser.print_dot_graphs(f)
481 | parser.parse(b"foo")
482 | f.seek(0)
483 | lines = [f.readline(), f.readline(), f.readline()]
484 | self.assertListEqual(lines, new_parse)
485 |
--------------------------------------------------------------------------------
/tests/test_query.py:
--------------------------------------------------------------------------------
1 | from re import error as RegexError
2 | from unittest import TestCase
3 |
4 | import tree_sitter_python
5 | import tree_sitter_javascript
6 |
7 | from tree_sitter import Language, Parser, Query, QueryCursor, QueryError
8 |
9 |
10 | def collect_matches(matches):
11 | return [(m[0], format_captures(m[1])) for m in matches]
12 |
13 |
14 | def format_captures(captures):
15 | return [(name, format_capture(capture)) for name, capture in captures.items()]
16 |
17 |
18 | def format_capture(capture):
19 | return [n.text.decode("utf-8") for n in capture]
20 |
21 |
22 | class TestQuery(TestCase):
23 | @classmethod
24 | def setUpClass(cls):
25 | cls.javascript = Language(tree_sitter_javascript.language())
26 | cls.python = Language(tree_sitter_python.language())
27 |
28 | def assert_query_matches(self, language, query, source, expected):
29 | parser = Parser(language)
30 | tree = parser.parse(source)
31 | cursor = QueryCursor(Query(language, query))
32 | matches = cursor.matches(tree.root_node)
33 | self.assertListEqual(collect_matches(matches), expected)
34 |
35 | def test_errors(self):
36 | with self.assertRaises(QueryError):
37 | Query(self.python, "(list (foo))")
38 | with self.assertRaises(QueryError):
39 | Query(self.python, "(function_definition buzz: (identifier))")
40 | with self.assertRaises(QueryError):
41 | Query(self.python, "((function_definition) (#eq? @garbage foo))")
42 | with self.assertRaises(QueryError):
43 | Query(self.python, "(list))")
44 |
45 | def test_matches_with_simple_pattern(self):
46 | self.assert_query_matches(
47 | self.javascript,
48 | "(function_declaration name: (identifier) @fn-name)",
49 | b"function one() { two(); function three() {} }",
50 | [(0, [("fn-name", ["one"])]), (0, [("fn-name", ["three"])])],
51 | )
52 |
53 | def test_matches_with_multiple_on_same_root(self):
54 | self.assert_query_matches(
55 | self.javascript,
56 | """
57 | (class_declaration
58 | name: (identifier) @the-class-name
59 | (class_body
60 | (method_definition
61 | name: (property_identifier) @the-method-name)))
62 | """,
63 | b"""
64 | class Person {
65 | // the constructor
66 | constructor(name) { this.name = name; }
67 |
68 | // the getter
69 | getFullName() { return this.name; }
70 | }
71 | """,
72 | [
73 | (0, [("the-class-name", ["Person"]), ("the-method-name", ["constructor"])]),
74 | (0, [("the-class-name", ["Person"]), ("the-method-name", ["getFullName"])]),
75 | ],
76 | )
77 |
78 | def test_matches_with_multiple_patterns_different_roots(self):
79 | self.assert_query_matches(
80 | self.javascript,
81 | """
82 | (function_declaration name: (identifier) @fn-def)
83 | (call_expression function: (identifier) @fn-ref)
84 | """,
85 | b"""
86 | function f1() {
87 | f2(f3());
88 | }
89 | """,
90 | [
91 | (0, [("fn-def", ["f1"])]),
92 | (1, [("fn-ref", ["f2"])]),
93 | (1, [("fn-ref", ["f3"])]),
94 | ],
95 | )
96 |
97 | def test_matches_with_nesting_and_no_fields(self):
98 | self.assert_query_matches(
99 | self.javascript,
100 | "(array (array (identifier) @x1 (identifier) @x2))",
101 | b"""
102 | [[a]];
103 | [[c, d], [e, f, g, h]];
104 | [[h], [i]];
105 | """,
106 | [
107 | (0, [("x1", ["c"]), ("x2", ["d"])]),
108 | (0, [("x1", ["e"]), ("x2", ["f"])]),
109 | (0, [("x1", ["e"]), ("x2", ["g"])]),
110 | (0, [("x1", ["f"]), ("x2", ["g"])]),
111 | (0, [("x1", ["e"]), ("x2", ["h"])]),
112 | (0, [("x1", ["f"]), ("x2", ["h"])]),
113 | (0, [("x1", ["g"]), ("x2", ["h"])]),
114 | ],
115 | )
116 |
117 | def test_matches_with_list_capture(self):
118 | self.assert_query_matches(
119 | self.javascript,
120 | """
121 | (function_declaration
122 | name: (identifier) @fn-name
123 | body: (statement_block (_)* @fn-statements))
124 | """,
125 | b"""function one() {
126 | x = 1;
127 | y = 2;
128 | z = 3;
129 | }
130 | function two() {
131 | x = 1;
132 | }
133 | """,
134 | [
135 | (
136 | 0,
137 | [
138 | ("fn-name", ["one"]),
139 | ("fn-statements", ["x = 1;", "y = 2;", "z = 3;"]),
140 | ],
141 | ),
142 | (0, [("fn-name", ["two"]), ("fn-statements", ["x = 1;"])]),
143 | ],
144 | )
145 |
146 | def test_captures(self):
147 | parser = Parser(self.python)
148 | source = b"def foo():\n bar()\ndef baz():\n quux()\n"
149 | tree = parser.parse(source)
150 | query = Query(
151 | self.python,
152 | """
153 | (function_definition name: (identifier) @func-def)
154 | (call function: (identifier) @func-call)
155 | """,
156 | )
157 |
158 | cursor = QueryCursor(query)
159 | captures = list(cursor.captures(tree.root_node).items())
160 |
161 | self.assertEqual(captures[0][0], "func-def")
162 | self.assertEqual(captures[0][1][0].start_point, (0, 4))
163 | self.assertEqual(captures[0][1][0].end_point, (0, 7))
164 | self.assertEqual(captures[0][1][1].start_point, (2, 4))
165 | self.assertEqual(captures[0][1][1].end_point, (2, 7))
166 |
167 | self.assertEqual(captures[1][0], "func-call")
168 | self.assertEqual(captures[1][1][0].start_point, (1, 2))
169 | self.assertEqual(captures[1][1][0].end_point, (1, 5))
170 | self.assertEqual(captures[1][1][1].start_point, (3, 2))
171 | self.assertEqual(captures[1][1][1].end_point, (3, 6))
172 |
173 | def test_text_predicates(self):
174 | parser = Parser(self.javascript)
175 | source = b"""
176 | keypair_object = {
177 | key1: value1,
178 | equal: equal
179 | }
180 |
181 | function fun1(arg) {
182 | return 1;
183 | }
184 |
185 | function fun2(arg) {
186 | return 2;
187 | }
188 | """
189 | tree = parser.parse(source)
190 | root_node = tree.root_node
191 |
192 | # function with name equal to 'fun1' -> test for #eq? @capture string
193 | query1 = Query(
194 | self.javascript,
195 | """
196 | ((function_declaration
197 | name: (identifier) @function-name)
198 | (#eq? @function-name fun1))
199 | """
200 | )
201 | cursor = QueryCursor(query1)
202 | captures1 = list(cursor.captures(root_node).items())
203 | self.assertEqual(1, len(captures1))
204 | self.assertEqual(captures1[0][0], "function-name")
205 | self.assertEqual(captures1[0][1][0].text, b"fun1")
206 |
207 | # functions with name not equal to 'fun1' -> test for #not-eq? @capture string
208 | query2 = Query(
209 | self.javascript,
210 | """
211 | ((function_declaration
212 | name: (identifier) @function-name)
213 | (#not-eq? @function-name fun1))
214 | """
215 | )
216 | cursor = QueryCursor(query2)
217 | captures2 = list(cursor.captures(root_node).items())
218 | self.assertEqual(1, len(captures2))
219 | self.assertEqual(captures2[0][0], "function-name")
220 | self.assertEqual(captures2[0][1][0].text, b"fun2")
221 |
222 | def test_text_predicates_with_callback(self):
223 | parser = Parser(self.javascript)
224 | source = b"""
225 | keypair_object = {
226 | key1: value1,
227 | equal: equal
228 | }
229 |
230 | function fun1(arg) {
231 | return 1;
232 | }
233 |
234 | function fun2(arg) {
235 | return 2;
236 | }
237 |
238 | function fun3(arg) {
239 | return 3;
240 | }
241 | """
242 |
243 | def read_callable_byte_offset(byte_offset, point):
244 | return source[byte_offset: byte_offset + 1]
245 |
246 | def read_callable_point(byte_offset, point):
247 | row, col = point
248 | lines = source.split(b"\n")
249 | if row >= len(lines):
250 | return b""
251 | line = lines[row]
252 | if col >= len(line):
253 | return b"\n"
254 | return line[col:col + 1]
255 |
256 | tree1 = parser.parse(read_callable_byte_offset)
257 | root_node1 = tree1.root_node
258 | tree2 = parser.parse(read_callable_point)
259 | root_node2 = tree2.root_node
260 |
261 | # function with name equal to 'fun1' -> test for #eq? @capture string
262 | query1 = Query(
263 | self.javascript,
264 | """
265 | ((function_declaration
266 | name: (identifier) @function-name)
267 | (#match? @function-name "fun[12]"))
268 | """
269 | )
270 | cursor1 = QueryCursor(query1)
271 | captures1 = cursor1.captures(root_node1)
272 | self.assertEqual(1, len(captures1))
273 | self.assertIn("function-name", captures1)
274 | self.assertEqual(2, len(captures1["function-name"]))
275 | self.assertEqual(captures1["function-name"][0].text, b"fun1")
276 | self.assertEqual(captures1["function-name"][1].text, b"fun2")
277 |
278 | captures2 = cursor1.captures(root_node2)
279 | self.assertEqual(1, len(captures2))
280 | self.assertIn("function-name", captures2)
281 | self.assertEqual(2, len(captures2["function-name"]))
282 | self.assertEqual(captures2["function-name"][0].text, b"fun1")
283 | self.assertEqual(captures2["function-name"][1].text, b"fun2")
284 |
285 | # functions with name not equal to 'fun1' -> test for #not-eq? @capture string
286 | query2 = Query(
287 | self.javascript,
288 | """
289 | ((function_declaration
290 | name: (identifier) @function-name)
291 | (#not-eq? @function-name fun1))
292 | """
293 | )
294 | cursor2 = QueryCursor(query2)
295 | captures3 = cursor2.captures(root_node1)
296 | self.assertEqual(1, len(captures3))
297 | self.assertIn("function-name", captures3)
298 | self.assertEqual(2, len(captures3["function-name"]))
299 | self.assertEqual(captures3["function-name"][0].text, b"fun2")
300 | self.assertEqual(captures3["function-name"][1].text, b"fun3")
301 |
302 | captures4 = cursor2.captures(root_node2)
303 | self.assertEqual(1, len(captures4))
304 | self.assertIn("function-name", captures4)
305 | self.assertEqual(2, len(captures4["function-name"]))
306 | self.assertEqual(captures4["function-name"][0].text, b"fun2")
307 | self.assertEqual(captures4["function-name"][1].text, b"fun3")
308 |
309 | def test_text_predicates_errors(self):
310 | with self.assertRaises(QueryError):
311 | Query(
312 | self.javascript,
313 | """
314 | ((function_declaration
315 | name: (identifier) @function-name)
316 | (#eq? @function-name @function-name fun1))
317 | """
318 | )
319 |
320 | with self.assertRaises(QueryError):
321 | Query(
322 | self.javascript,
323 | """
324 | ((function_declaration
325 | name: (identifier) @function-name)
326 | (#eq? fun1 @function-name))
327 | """
328 | )
329 |
330 | with self.assertRaises(QueryError):
331 | Query(
332 | self.javascript,
333 | """
334 | ((function_declaration
335 | name: (identifier) @function-name)
336 | (#match? @function-name @function-name fun1))
337 | """
338 | )
339 |
340 | with self.assertRaises(QueryError):
341 | Query(
342 | self.javascript,
343 | """
344 | ((function_declaration
345 | name: (identifier) @function-name)
346 | (#match? fun1 @function-name))
347 | """
348 | )
349 |
350 | with self.assertRaises(QueryError):
351 | Query(
352 | self.javascript,
353 | """
354 | ((function_declaration
355 | name: (identifier) @function-name)
356 | (#match? @function-name @function-name))
357 | """
358 | )
359 |
360 | with self.assertRaises(QueryError) as ctx:
361 | Query(
362 | self.javascript,
363 | """
364 | ((function_declaration
365 | name: (identifier) @function-name)
366 | (#match? @function-name "?"))
367 | """
368 | )
369 | self.assertEqual(
370 | str(ctx.exception), "Invalid predicate in pattern at row 1: regular expression error"
371 | )
372 | self.assertIsInstance(ctx.exception.__cause__, RegexError)
373 |
374 | def test_point_range_captures(self):
375 | parser = Parser(self.python)
376 | source = b"def foo():\n bar()\ndef baz():\n quux()\n"
377 | tree = parser.parse(source)
378 | query = Query(
379 | self.python,
380 | """
381 | (function_definition name: (identifier) @func-def)
382 | (call function: (identifier) @func-call)
383 | """
384 | )
385 | cursor = QueryCursor(query)
386 | cursor.set_point_range((1, 0), (2, 0))
387 |
388 | captures = list(cursor.captures(tree.root_node).items())
389 |
390 | self.assertEqual(captures[0][0], "func-call")
391 | self.assertEqual(captures[0][1][0].start_point, (1, 2))
392 | self.assertEqual(captures[0][1][0].end_point, (1, 5))
393 |
394 | def test_byte_range_captures(self):
395 | parser = Parser(self.python)
396 | source = b"def foo():\n bar()\ndef baz():\n quux()\n"
397 | tree = parser.parse(source)
398 | query = Query(
399 | self.python,
400 | """
401 | (function_definition name: (identifier) @func-def)
402 | (call function: (identifier) @func-call)
403 | """
404 | )
405 | cursor = QueryCursor(query)
406 | cursor.set_byte_range(10, 20)
407 |
408 | captures = list(cursor.captures(tree.root_node).items())
409 | self.assertEqual(captures[0][0], "func-call")
410 | self.assertEqual(captures[0][1][0].start_point, (1, 2))
411 | self.assertEqual(captures[0][1][0].end_point, (1, 5))
412 |
--------------------------------------------------------------------------------
/tests/test_tree.py:
--------------------------------------------------------------------------------
1 | from typing import cast
2 | from unittest import TestCase
3 |
4 | from tree_sitter import Language, Node, Parser
5 |
6 | import tree_sitter_python
7 | import tree_sitter_rust
8 |
9 |
10 | class TestTree(TestCase):
11 | @classmethod
12 | def setUpClass(cls):
13 | cls.python = Language(tree_sitter_python.language())
14 | cls.rust = Language(tree_sitter_rust.language())
15 |
16 | def test_edit(self):
17 | parser = Parser(self.python)
18 | tree = parser.parse(b"def foo():\n bar()")
19 |
20 | edit_offset = len(b"def foo(")
21 | tree.edit(
22 | start_byte=edit_offset,
23 | old_end_byte=edit_offset,
24 | new_end_byte=edit_offset + 2,
25 | start_point=(0, edit_offset),
26 | old_end_point=(0, edit_offset),
27 | new_end_point=(0, edit_offset + 2),
28 | )
29 |
30 | fn_node = tree.root_node.children[0]
31 | self.assertEqual(fn_node.type, "function_definition")
32 | self.assertTrue(fn_node.has_changes)
33 | self.assertFalse(fn_node.children[0].has_changes)
34 | self.assertFalse(fn_node.children[1].has_changes)
35 | self.assertFalse(fn_node.children[3].has_changes)
36 |
37 | params_node = fn_node.children[2]
38 | self.assertEqual(params_node.type, "parameters")
39 | self.assertTrue(params_node.has_changes)
40 | self.assertEqual(params_node.start_point, (0, edit_offset - 1))
41 | self.assertEqual(params_node.end_point, (0, edit_offset + 3))
42 |
43 | new_tree = parser.parse(b"def foo(ab):\n bar()", tree)
44 | self.assertEqual(
45 | str(new_tree.root_node),
46 | "(module (function_definition"
47 | + " name: (identifier)"
48 | + " parameters: (parameters (identifier))"
49 | + " body: (block"
50 | + " (expression_statement (call"
51 | + " function: (identifier)"
52 | + " arguments: (argument_list))))))",
53 | )
54 |
55 | def test_changed_ranges(self):
56 | parser = Parser(self.python)
57 | tree = parser.parse(b"def foo():\n bar()")
58 |
59 | edit_offset = len(b"def foo(")
60 | tree.edit(
61 | start_byte=edit_offset,
62 | old_end_byte=edit_offset,
63 | new_end_byte=edit_offset + 2,
64 | start_point=(0, edit_offset),
65 | old_end_point=(0, edit_offset),
66 | new_end_point=(0, edit_offset + 2),
67 | )
68 |
69 | new_tree = parser.parse(b"def foo(ab):\n bar()", tree)
70 | changed_ranges = tree.changed_ranges(new_tree)
71 |
72 | self.assertEqual(len(changed_ranges), 1)
73 | self.assertEqual(changed_ranges[0].start_byte, edit_offset)
74 | self.assertEqual(changed_ranges[0].start_point, (0, edit_offset))
75 | self.assertEqual(changed_ranges[0].end_byte, edit_offset + 2)
76 | self.assertEqual(changed_ranges[0].end_point, (0, edit_offset + 2))
77 |
78 | def test_walk(self):
79 | parser = Parser(self.rust)
80 |
81 | tree = parser.parse(
82 | b"""
83 | struct Stuff {
84 | a: A,
85 | b: Option,
86 | }
87 | """
88 | )
89 |
90 | cursor = tree.walk()
91 |
92 | # Node always returns the same instance
93 | self.assertIs(cursor.node, cursor.node)
94 |
95 | self.assertEqual(cast(Node, cursor.node).type, "source_file")
96 |
97 | self.assertEqual(cursor.goto_first_child(), True)
98 | self.assertEqual(cast(Node, cursor.node).type, "struct_item")
99 |
100 | self.assertEqual(cursor.goto_first_child(), True)
101 | self.assertEqual(cast(Node, cursor.node).type, "struct")
102 | self.assertEqual(cast(Node, cursor.node).is_named, False)
103 |
104 | self.assertEqual(cursor.goto_next_sibling(), True)
105 | self.assertEqual(cast(Node, cursor.node).type, "type_identifier")
106 | self.assertEqual(cast(Node, cursor.node).is_named, True)
107 |
108 | self.assertEqual(cursor.goto_next_sibling(), True)
109 | self.assertEqual(cast(Node, cursor.node).type, "field_declaration_list")
110 | self.assertEqual(cast(Node, cursor.node).is_named, True)
111 |
112 | self.assertEqual(cursor.goto_last_child(), True)
113 | self.assertEqual(cast(Node, cursor.node).type, "}")
114 | self.assertEqual(cast(Node, cursor.node).is_named, False)
115 | self.assertEqual(cast(Node, cursor.node).start_point, (4, 16))
116 |
117 | self.assertEqual(cursor.goto_previous_sibling(), True)
118 | self.assertEqual(cast(Node, cursor.node).type, ",")
119 | self.assertEqual(cast(Node, cursor.node).is_named, False)
120 | self.assertEqual(cast(Node, cursor.node).start_point, (3, 32))
121 |
122 | self.assertEqual(cursor.goto_previous_sibling(), True)
123 | self.assertEqual(cast(Node, cursor.node).type, "field_declaration")
124 | self.assertEqual(cast(Node, cursor.node).is_named, True)
125 | self.assertEqual(cast(Node, cursor.node).start_point, (3, 20))
126 |
127 | self.assertEqual(cursor.goto_previous_sibling(), True)
128 | self.assertEqual(cast(Node, cursor.node).type, ",")
129 | self.assertEqual(cast(Node, cursor.node).is_named, False)
130 | self.assertEqual(cast(Node, cursor.node).start_point, (2, 24))
131 |
132 | self.assertEqual(cursor.goto_previous_sibling(), True)
133 | self.assertEqual(cast(Node, cursor.node).type, "field_declaration")
134 | self.assertEqual(cast(Node, cursor.node).is_named, True)
135 | self.assertEqual(cast(Node, cursor.node).start_point, (2, 20))
136 |
137 | self.assertEqual(cursor.goto_previous_sibling(), True)
138 | self.assertEqual(cast(Node, cursor.node).type, "{")
139 | self.assertEqual(cast(Node, cursor.node).is_named, False)
140 | self.assertEqual(cast(Node, cursor.node).start_point, (1, 29))
141 |
142 | copy = tree.walk()
143 | copy.reset_to(cursor)
144 |
145 | self.assertEqual(cast(Node, copy.node).type, "{")
146 | self.assertEqual(cast(Node, copy.node).is_named, False)
147 |
148 | self.assertEqual(copy.goto_parent(), True)
149 | self.assertEqual(cast(Node, copy.node).type, "field_declaration_list")
150 | self.assertEqual(cast(Node, copy.node).is_named, True)
151 |
152 | self.assertEqual(copy.goto_parent(), True)
153 | self.assertEqual(cast(Node, copy.node).type, "struct_item")
154 |
--------------------------------------------------------------------------------
/tree_sitter/__init__.py:
--------------------------------------------------------------------------------
1 | """Python bindings to the Tree-sitter parsing library."""
2 |
3 | from typing import Protocol as _Protocol
4 |
5 | from ._binding import (
6 | Language,
7 | LogType,
8 | LookaheadIterator,
9 | Node,
10 | Parser,
11 | Point,
12 | Query,
13 | QueryCursor,
14 | QueryError,
15 | Range,
16 | Tree,
17 | TreeCursor,
18 | LANGUAGE_VERSION,
19 | MIN_COMPATIBLE_LANGUAGE_VERSION,
20 | )
21 |
22 | LogType.__doc__ = "The type of a log message."
23 |
24 | Point.__doc__ = "A position in a multi-line text document, in terms of rows and columns."
25 | Point.row.__doc__ = "The zero-based row of the document."
26 | Point.column.__doc__ = "The zero-based column of the document."
27 |
28 |
29 | class QueryPredicate(_Protocol):
30 | """A custom query predicate that runs on a pattern."""
31 | def __call__(self, predicate, args, pattern_index, captures):
32 | """
33 | Parameters
34 | ----------
35 |
36 | predicate : str
37 | The name of the predicate.
38 | args : list[tuple[str, typing.Literal['capture', 'string']]]
39 | The arguments to the predicate.
40 | pattern_index : int
41 | The index of the pattern within the query.
42 | captures : dict[str, list[Node]]
43 | The captures contained in the pattern.
44 |
45 | Returns
46 | -------
47 | ``True`` if the predicate matches, ``False`` otherwise.
48 |
49 | Tip
50 | ---
51 | You don't need to create an actual class, just a function with this signature.
52 | """
53 |
54 |
55 | __all__ = [
56 | "Language",
57 | "LogType",
58 | "LookaheadIterator",
59 | "Node",
60 | "Parser",
61 | "Point",
62 | "Query",
63 | "QueryCursor",
64 | "QueryError",
65 | "QueryPredicate",
66 | "Range",
67 | "Tree",
68 | "TreeCursor",
69 | "LANGUAGE_VERSION",
70 | "MIN_COMPATIBLE_LANGUAGE_VERSION",
71 | ]
72 |
--------------------------------------------------------------------------------
/tree_sitter/__init__.pyi:
--------------------------------------------------------------------------------
1 | from enum import IntEnum
2 | from collections.abc import ByteString, Callable, Iterator, Sequence
3 | from typing import Annotated, Any, Final, Literal, NamedTuple, Protocol, Self, final, overload
4 | from typing_extensions import deprecated
5 |
6 | class _SupportsFileno(Protocol):
7 | def fileno(self) -> int: ...
8 |
9 | class Point(NamedTuple):
10 | row: int
11 | column: int
12 |
13 | class LogType(IntEnum):
14 | PARSE: int
15 | LEX: int
16 |
17 | @final
18 | class Language:
19 | @overload
20 | @deprecated("int argument support is deprecated")
21 | def __init__(self, ptr: Annotated[int, "TSLanguage *"], /) -> None: ...
22 | @overload
23 | def __init__(self, ptr: Annotated[object, "TSLanguage *"], /) -> None: ...
24 | @property
25 | def name(self) -> str | None: ...
26 | @property
27 | def abi_version(self) -> int: ...
28 | @property
29 | def semantic_version(self) -> tuple[int, int, int] | None: ...
30 | @deprecated("Use abi_version instead")
31 | @property
32 | def version(self) -> int: ...
33 | @property
34 | def node_kind_count(self) -> int: ...
35 | @property
36 | def parse_state_count(self) -> int: ...
37 | @property
38 | def field_count(self) -> int: ...
39 | @property
40 | def supertypes(self) -> tuple[int, ...]: ...
41 | def subtypes(self, supertype: int, /) -> tuple[int, ...]: ...
42 | def node_kind_for_id(self, id: int, /) -> str | None: ...
43 | def id_for_node_kind(self, kind: str, named: bool, /) -> int | None: ...
44 | def node_kind_is_named(self, id: int, /) -> bool: ...
45 | def node_kind_is_visible(self, id: int, /) -> bool: ...
46 | def node_kind_is_supertype(self, id: int, /) -> bool: ...
47 | def field_name_for_id(self, field_id: int, /) -> str | None: ...
48 | def field_id_for_name(self, name: str, /) -> int | None: ...
49 | def next_state(self, state: int, id: int, /) -> int: ...
50 | def lookahead_iterator(self, state: int, /) -> LookaheadIterator | None: ...
51 | @deprecated("Use the Query() constructor instead")
52 | def query(self, source: str, /) -> Query: ...
53 | def copy(self) -> Language: ...
54 | def __repr__(self) -> str: ...
55 | def __eq__(self, other: Any, /) -> bool: ...
56 | def __ne__(self, other: Any, /) -> bool: ...
57 | def __hash__(self) -> int: ...
58 | def __copy__(self) -> Language: ...
59 |
60 | @final
61 | class Node:
62 | @property
63 | def id(self) -> int: ...
64 | @property
65 | def kind_id(self) -> int: ...
66 | @property
67 | def grammar_id(self) -> int: ...
68 | @property
69 | def grammar_name(self) -> str: ...
70 | @property
71 | def type(self) -> str: ...
72 | @property
73 | def is_named(self) -> bool: ...
74 | @property
75 | def is_extra(self) -> bool: ...
76 | @property
77 | def has_changes(self) -> bool: ...
78 | @property
79 | def has_error(self) -> bool: ...
80 | @property
81 | def is_error(self) -> bool: ...
82 | @property
83 | def parse_state(self) -> int: ...
84 | @property
85 | def next_parse_state(self) -> int: ...
86 | @property
87 | def is_missing(self) -> bool: ...
88 | @property
89 | def start_byte(self) -> int: ...
90 | @property
91 | def end_byte(self) -> int: ...
92 | @property
93 | def byte_range(self) -> tuple[int, int]: ...
94 | @property
95 | def range(self) -> Range: ...
96 | @property
97 | def start_point(self) -> Point: ...
98 | @property
99 | def end_point(self) -> Point: ...
100 | @property
101 | def children(self) -> list[Node]: ...
102 | @property
103 | def child_count(self) -> int: ...
104 | @property
105 | def named_children(self) -> list[Node]: ...
106 | @property
107 | def named_child_count(self) -> int: ...
108 | @property
109 | def parent(self) -> Node | None: ...
110 | @property
111 | def next_sibling(self) -> Node | None: ...
112 | @property
113 | def prev_sibling(self) -> Node | None: ...
114 | @property
115 | def next_named_sibling(self) -> Node | None: ...
116 | @property
117 | def prev_named_sibling(self) -> Node | None: ...
118 | @property
119 | def descendant_count(self) -> int: ...
120 | @property
121 | def text(self) -> bytes | None: ...
122 | def walk(self) -> TreeCursor: ...
123 | def edit(
124 | self,
125 | start_byte: int,
126 | old_end_byte: int,
127 | new_end_byte: int,
128 | start_point: Point | tuple[int, int],
129 | old_end_point: Point | tuple[int, int],
130 | new_end_point: Point | tuple[int, int],
131 | ) -> None: ...
132 | def child(self, index: int, /) -> Node | None: ...
133 | def named_child(self, index: int, /) -> Node | None: ...
134 | def first_child_for_byte(self, byte: int, /) -> Node | None: ...
135 | def first_named_child_for_byte(self, byte: int, /) -> Node | None: ...
136 | def child_by_field_id(self, id: int, /) -> Node | None: ...
137 | def child_by_field_name(self, name: str, /) -> Node | None: ...
138 | def child_with_descendant(self, descendant: Node, /) -> Node | None: ...
139 | def children_by_field_id(self, id: int, /) -> list[Node]: ...
140 | def children_by_field_name(self, name: str, /) -> list[Node]: ...
141 | def field_name_for_child(self, child_index: int, /) -> str | None: ...
142 | def field_name_for_named_child(self, child_index: int, /) -> str | None: ...
143 | def descendant_for_byte_range(
144 | self,
145 | start_byte: int,
146 | end_byte: int,
147 | /,
148 | ) -> Node | None: ...
149 | def named_descendant_for_byte_range(
150 | self,
151 | start_byte: int,
152 | end_byte: int,
153 | /,
154 | ) -> Node | None: ...
155 | def descendant_for_point_range(
156 | self,
157 | start_point: Point | tuple[int, int],
158 | end_point: Point | tuple[int, int],
159 | /,
160 | ) -> Node | None: ...
161 | def named_descendant_for_point_range(
162 | self,
163 | start_point: Point | tuple[int, int],
164 | end_point: Point | tuple[int, int],
165 | /,
166 | ) -> Node | None: ...
167 | def __repr__(self) -> str: ...
168 | def __str__(self) -> str: ...
169 | def __eq__(self, other: Any, /) -> bool: ...
170 | def __ne__(self, other: Any, /) -> bool: ...
171 | def __hash__(self) -> int: ...
172 |
173 | @final
174 | class Tree:
175 | @property
176 | def root_node(self) -> Node: ...
177 | @property
178 | def included_ranges(self) -> list[Range]: ...
179 | @property
180 | def language(self) -> Language: ...
181 | def root_node_with_offset(
182 | self,
183 | offset_bytes: int,
184 | offset_extent: Point | tuple[int, int],
185 | /,
186 | ) -> Node | None: ...
187 | def copy(self) -> Tree: ...
188 | def edit(
189 | self,
190 | start_byte: int,
191 | old_end_byte: int,
192 | new_end_byte: int,
193 | start_point: Point | tuple[int, int],
194 | old_end_point: Point | tuple[int, int],
195 | new_end_point: Point | tuple[int, int],
196 | ) -> None: ...
197 | def walk(self) -> TreeCursor: ...
198 | def changed_ranges(self, new_tree: Tree, /) -> list[Range]: ...
199 | def print_dot_graph(self, file: _SupportsFileno, /) -> None: ...
200 | def __copy__(self) -> Tree: ...
201 |
202 | @final
203 | class TreeCursor:
204 | @property
205 | def node(self) -> Node | None: ...
206 | @property
207 | def field_id(self) -> int | None: ...
208 | @property
209 | def field_name(self) -> str | None: ...
210 | @property
211 | def depth(self) -> int: ...
212 | @property
213 | def descendant_index(self) -> int: ...
214 | def copy(self) -> TreeCursor: ...
215 | def reset(self, node: Node, /) -> None: ...
216 | def reset_to(self, cursor: TreeCursor, /) -> None: ...
217 | def goto_first_child(self) -> bool: ...
218 | def goto_last_child(self) -> bool: ...
219 | def goto_parent(self) -> bool: ...
220 | def goto_next_sibling(self) -> bool: ...
221 | def goto_previous_sibling(self) -> bool: ...
222 | def goto_descendant(self, index: int, /) -> None: ...
223 | def goto_first_child_for_byte(self, byte: int, /) -> int | None: ...
224 | def goto_first_child_for_point(self, point: Point | tuple[int, int], /) -> int | None: ...
225 | def __copy__(self) -> TreeCursor: ...
226 |
227 | @final
228 | class Parser:
229 | @overload
230 | def __init__(
231 | self,
232 | language: Language | None = None,
233 | *,
234 | included_ranges: Sequence[Range] | None = None,
235 | logger: Callable[[LogType, str], None] | None = None,
236 | ) -> None: ...
237 | @deprecated("timeout_micros is deprecated")
238 | @overload
239 | def __init__(
240 | self,
241 | language: Language | None = None,
242 | *,
243 | included_ranges: Sequence[Range] | None = None,
244 | timeout_micros: int | None = None,
245 | logger: Callable[[LogType, str], None] | None = None,
246 | ) -> None: ...
247 | @property
248 | def language(self) -> Language | None: ...
249 | @language.setter
250 | def language(self, language: Language) -> None: ...
251 | @language.deleter
252 | def language(self) -> None: ...
253 | @property
254 | def included_ranges(self) -> list[Range]: ...
255 | @included_ranges.setter
256 | def included_ranges(self, ranges: Sequence[Range]) -> None: ...
257 | @included_ranges.deleter
258 | def included_ranges(self) -> None: ...
259 | @deprecated("Use the progress_callback in parse()")
260 | @property
261 | def timeout_micros(self) -> int: ...
262 | @deprecated("Use the progress_callback in parse()")
263 | @timeout_micros.setter
264 | def timeout_micros(self, timeout: int) -> None: ...
265 | @deprecated("Use the progress_callback in parse()")
266 | @timeout_micros.deleter
267 | def timeout_micros(self) -> None: ...
268 | @property
269 | def logger(self) -> Callable[[LogType, str], None] | None: ...
270 | @logger.setter
271 | def logger(self, logger: Callable[[LogType, str], None]) -> None: ...
272 | @logger.deleter
273 | def logger(self) -> None: ...
274 | @overload
275 | def parse(
276 | self,
277 | source: ByteString,
278 | /,
279 | old_tree: Tree | None = None,
280 | encoding: Literal["utf8", "utf16", "utf16le", "utf16be"] = "utf8",
281 | ) -> Tree: ...
282 | @overload
283 | def parse(
284 | self,
285 | read_callback: Callable[[int, Point], bytes | None],
286 | /,
287 | old_tree: Tree | None = None,
288 | encoding: Literal["utf8", "utf16", "utf16le", "utf16be"] = "utf8",
289 | progress_callback: Callable[[int, bool], bool] | None = None,
290 | ) -> Tree: ...
291 | def reset(self) -> None: ...
292 | def print_dot_graphs(self, file: _SupportsFileno | None, /) -> None: ...
293 |
294 | class QueryError(ValueError): ...
295 |
296 | class QueryPredicate(Protocol):
297 | def __call__(
298 | self,
299 | predicate: str,
300 | args: list[tuple[str, Literal["capture", "string"]]],
301 | pattern_index: int,
302 | captures: dict[str, list[Node]],
303 | ) -> bool: ...
304 |
305 | @final
306 | class Query:
307 | def __new__(cls, language: Language, source: str, /) -> Self: ...
308 | def pattern_count(self) -> int: ...
309 | def capture_count(self) -> int: ...
310 | def string_count(self) -> int: ...
311 | def start_byte_for_pattern(self, index: int, /) -> int: ...
312 | def end_byte_for_pattern(self, index: int, /) -> int: ...
313 | def is_pattern_rooted(self, index: int, /) -> bool: ...
314 | def is_pattern_non_local(self, index: int, /) -> bool: ...
315 | def is_pattern_guaranteed_at_step(self, index: int, /) -> bool: ...
316 | def capture_name(self, index: int, /) -> str: ...
317 | def capture_quantifier(
318 | self,
319 | pattern_index: int,
320 | capture_index: int,
321 | /
322 | ) -> Literal["", "?", "*", "+"]: ...
323 | def string_value(self, index: int, /) -> str: ...
324 | def disable_capture(self, name: str, /) -> None: ...
325 | def disable_pattern(self, index: int, /) -> None: ...
326 | def pattern_settings(self, index: int, /) -> dict[str, str | None]: ...
327 | def pattern_assertions(self, index: int, /) -> dict[str, tuple[str | None, bool]]: ...
328 |
329 | @final
330 | class QueryCursor:
331 | @overload
332 | def __init__(self, query: Query, *, match_limit: int = 0xFFFFFFFF) -> None: ...
333 | @deprecated("timeout_micros is deprecated")
334 | @overload
335 | def __init__(
336 | self,
337 | query: Query,
338 | *,
339 | match_limit: int = 0xFFFFFFFF,
340 | timeout_micros: int = 0
341 | ) -> None: ...
342 | @property
343 | def match_limit(self) -> int: ...
344 | @match_limit.setter
345 | def match_limit(self, limit: int) -> None: ...
346 | @match_limit.deleter
347 | def match_limit(self) -> None: ...
348 | @deprecated("Use the progress_callback in matches() or captures()")
349 | @property
350 | def timeout_micros(self) -> int: ...
351 | @deprecated("Use the progress_callback in matches() or captures()")
352 | @timeout_micros.setter
353 | def timeout_micros(self, timeout: int) -> None: ...
354 | @property
355 | def did_exceed_match_limit(self) -> bool: ...
356 | def set_max_start_depth(self, depth: int, /) -> None: ...
357 | def set_byte_range(self, start: int, end: int, /) -> None: ...
358 | def set_point_range(
359 | self,
360 | start: Point | tuple[int, int],
361 | end: Point | tuple[int, int],
362 | /,
363 | ) -> None: ...
364 | def captures(
365 | self,
366 | node: Node,
367 | predicate: QueryPredicate | None = None,
368 | progress_callback: Callable[[int], bool] | None = None,
369 | /,
370 | ) -> dict[str, list[Node]]: ...
371 | def matches(
372 | self,
373 | node: Node,
374 | predicate: QueryPredicate | None = None,
375 | progress_callback: Callable[[int], bool] | None = None,
376 | /,
377 | ) -> list[tuple[int, dict[str, list[Node]]]]: ...
378 |
379 | @final
380 | class LookaheadIterator(Iterator[tuple[int, str]]):
381 | @property
382 | def language(self) -> Language: ...
383 | @property
384 | def current_symbol(self) -> int: ...
385 | @property
386 | def current_symbol_name(self) -> str: ...
387 | def reset(self, state: int, /, language: Language | None = None) -> bool: ...
388 | def names(self) -> list[str]: ...
389 | def symbols(self) -> list[int]: ...
390 | def __next__(self) -> tuple[int, str]: ...
391 |
392 | @final
393 | class Range:
394 | def __init__(
395 | self,
396 | start_point: Point | tuple[int, int],
397 | end_point: Point | tuple[int, int],
398 | start_byte: int,
399 | end_byte: int,
400 | ) -> None: ...
401 | @property
402 | def start_point(self) -> Point: ...
403 | @property
404 | def end_point(self) -> Point: ...
405 | @property
406 | def start_byte(self) -> int: ...
407 | @property
408 | def end_byte(self) -> int: ...
409 | def __eq__(self, other: Any, /) -> bool: ...
410 | def __ne__(self, other: Any, /) -> bool: ...
411 | def __repr__(self) -> str: ...
412 | def __hash__(self) -> int: ...
413 |
414 | LANGUAGE_VERSION: Final[int]
415 |
416 | MIN_COMPATIBLE_LANGUAGE_VERSION: Final[int]
417 |
--------------------------------------------------------------------------------
/tree_sitter/binding/language.c:
--------------------------------------------------------------------------------
1 | #include "types.h"
2 |
3 | int language_init(Language *self, PyObject *args, PyObject *Py_UNUSED(kwargs)) {
4 | PyObject *language;
5 | if (!PyArg_ParseTuple(args, "O:__init__", &language)) {
6 | return -1;
7 | }
8 |
9 | if (PyCapsule_CheckExact(language)) {
10 | self->language = PyCapsule_GetPointer(language, "tree_sitter.Language");
11 | } else {
12 | Py_uintptr_t language_id = PyLong_AsSize_t(language);
13 | if (language_id == 0 || (language_id % sizeof(TSLanguage *)) != 0) {
14 | if (!PyErr_Occurred()) {
15 | PyErr_SetString(PyExc_ValueError, "invalid language ID");
16 | }
17 | return -1;
18 | }
19 | if (DEPRECATE("int argument support is deprecated") < 0) {
20 | return -1;
21 | }
22 | self->language = PyLong_AsVoidPtr(language);
23 | }
24 |
25 | if (self->language == NULL) {
26 | return -1;
27 | }
28 | self->abi_version = ts_language_abi_version(self->language);
29 | self->name = ts_language_name(self->language);
30 | return 0;
31 | }
32 |
33 | void language_dealloc(Language *self) {
34 | ts_language_delete(self->language);
35 | Py_TYPE(self)->tp_free(self);
36 | }
37 |
38 | PyObject *language_repr(Language *self) {
39 | if (self->name == NULL) {
40 | return PyUnicode_FromFormat("",
41 | (Py_uintptr_t)self->language, self->abi_version);
42 | }
43 | return PyUnicode_FromFormat("",
44 | (Py_uintptr_t)self->language, self->abi_version, self->name);
45 | }
46 |
47 | Py_hash_t language_hash(Language *self) { return (Py_hash_t)self->language; }
48 |
49 | PyObject *language_compare(Language *self, PyObject *other, int op) {
50 | if ((op != Py_EQ && op != Py_NE) || !IS_INSTANCE(other, language_type)) {
51 | Py_RETURN_NOTIMPLEMENTED;
52 | }
53 |
54 | Language *lang = (Language *)other;
55 | bool result = (Py_uintptr_t)self->language == (Py_uintptr_t)lang->language;
56 | return PyBool_FromLong(result ^ (op == Py_NE));
57 | }
58 |
59 | PyObject *language_get_name(Language *self, void *Py_UNUSED(payload)) {
60 | if (self->name == NULL) {
61 | Py_RETURN_NONE;
62 | }
63 | return PyUnicode_FromString(self->name);
64 | }
65 |
66 | PyObject *language_get_version(Language *self, void *Py_UNUSED(payload)) {
67 | if (REPLACE("version", "abi_version") < 0) {
68 | return NULL;
69 | }
70 | return PyLong_FromUnsignedLong(self->abi_version);
71 | }
72 |
73 | PyObject *language_get_abi_version(Language *self, void *Py_UNUSED(payload)) {
74 | return PyLong_FromUnsignedLong(self->abi_version);
75 | }
76 |
77 | PyObject *language_get_semantic_version(Language *self, void *Py_UNUSED(payload)) {
78 | const TSLanguageMetadata *metadata = ts_language_metadata(self->language);
79 | if (metadata == NULL) {
80 | Py_RETURN_NONE;
81 | }
82 | PyObject *major = PyLong_FromUnsignedLong(metadata->major_version),
83 | *minor = PyLong_FromUnsignedLong(metadata->minor_version),
84 | *patch = PyLong_FromUnsignedLong(metadata->patch_version);
85 | PyObject *result = PyTuple_Pack(3, major, minor, patch);
86 | Py_XDECREF(major);
87 | Py_XDECREF(minor);
88 | Py_XDECREF(patch);
89 | return result;
90 | }
91 |
92 | PyObject *language_get_node_kind_count(Language *self, void *Py_UNUSED(payload)) {
93 | return PyLong_FromUnsignedLong(ts_language_symbol_count(self->language));
94 | }
95 |
96 | PyObject *language_get_parse_state_count(Language *self, void *Py_UNUSED(payload)) {
97 | return PyLong_FromUnsignedLong(ts_language_state_count(self->language));
98 | }
99 |
100 | PyObject *language_get_field_count(Language *self, void *Py_UNUSED(payload)) {
101 | return PyLong_FromUnsignedLong(ts_language_field_count(self->language));
102 | }
103 |
104 | PyObject *language_get_supertypes(Language *self, void *Py_UNUSED(payload)) {
105 | uint32_t length;
106 | const TSSymbol *symbols = ts_language_supertypes(self->language, &length);
107 | if (length == 0) {
108 | return PyTuple_New(0);
109 | }
110 | PyObject *result = PyTuple_New(length);
111 | for (uint32_t i = 0; i < length; ++i) {
112 | PyTuple_SetItem(result, i, PyLong_FromUnsignedLong(symbols[i]));
113 | }
114 | return result;
115 | }
116 |
117 | PyObject *language_subtypes(Language *self, PyObject *args) {
118 | TSSymbol supertype;
119 | if (!PyArg_ParseTuple(args, "H:subtypes", &supertype)) {
120 | return NULL;
121 | }
122 | uint32_t length;
123 | const TSSymbol *symbols = ts_language_subtypes(self->language, supertype, &length);
124 | if (length == 0) {
125 | return PyTuple_New(0);
126 | }
127 | PyObject *result = PyTuple_New(length);
128 | for (uint32_t i = 0; i < length; ++i) {
129 | PyTuple_SetItem(result, i, PyLong_FromUnsignedLong(symbols[i]));
130 | }
131 | return result;
132 | }
133 |
134 | PyObject *language_node_kind_for_id(Language *self, PyObject *args) {
135 | TSSymbol symbol;
136 | if (!PyArg_ParseTuple(args, "H:node_kind_for_id", &symbol)) {
137 | return NULL;
138 | }
139 | const char *name = ts_language_symbol_name(self->language, symbol);
140 | if (name == NULL) {
141 | Py_RETURN_NONE;
142 | }
143 | return PyUnicode_FromString(name);
144 | }
145 |
146 | PyObject *language_id_for_node_kind(Language *self, PyObject *args) {
147 | char *kind;
148 | Py_ssize_t length;
149 | int named;
150 | if (!PyArg_ParseTuple(args, "s#p:id_for_node_kind", &kind, &length, &named)) {
151 | return NULL;
152 | }
153 | TSSymbol symbol = ts_language_symbol_for_name(self->language, kind, length, named);
154 | if (symbol == 0) {
155 | Py_RETURN_NONE;
156 | }
157 | return PyLong_FromUnsignedLong(symbol);
158 | }
159 |
160 | PyObject *language_node_kind_is_named(Language *self, PyObject *args) {
161 | TSSymbol symbol;
162 | if (!PyArg_ParseTuple(args, "H:node_kind_is_named", &symbol)) {
163 | return NULL;
164 | }
165 | TSSymbolType symbol_type = ts_language_symbol_type(self->language, symbol);
166 | return PyBool_FromLong(symbol_type == TSSymbolTypeRegular);
167 | }
168 |
169 | PyObject *language_node_kind_is_visible(Language *self, PyObject *args) {
170 | TSSymbol symbol;
171 | if (!PyArg_ParseTuple(args, "H:node_kind_is_visible", &symbol)) {
172 | return NULL;
173 | }
174 | TSSymbolType symbol_type = ts_language_symbol_type(self->language, symbol);
175 | return PyBool_FromLong(symbol_type <= TSSymbolTypeAnonymous);
176 | }
177 |
178 | PyObject *language_node_kind_is_supertype(Language *self, PyObject *args) {
179 | TSSymbol symbol;
180 | if (!PyArg_ParseTuple(args, "H:node_kind_is_supertype", &symbol)) {
181 | return NULL;
182 | }
183 | TSSymbolType symbol_type = ts_language_symbol_type(self->language, symbol);
184 | return PyBool_FromLong(symbol_type <= TSSymbolTypeSupertype);
185 | }
186 |
187 | PyObject *language_field_name_for_id(Language *self, PyObject *args) {
188 | uint16_t field_id;
189 | if (!PyArg_ParseTuple(args, "H:field_name_for_id", &field_id)) {
190 | return NULL;
191 | }
192 | const char *field_name = ts_language_field_name_for_id(self->language, field_id);
193 | if (field_name == NULL) {
194 | Py_RETURN_NONE;
195 | }
196 | return PyUnicode_FromString(field_name);
197 | }
198 |
199 | PyObject *language_field_id_for_name(Language *self, PyObject *args) {
200 | char *field_name;
201 | Py_ssize_t length;
202 | if (!PyArg_ParseTuple(args, "s#:field_id_for_name", &field_name, &length)) {
203 | return NULL;
204 | }
205 | TSFieldId field_id = ts_language_field_id_for_name(self->language, field_name, length);
206 | if (field_id == 0) {
207 | Py_RETURN_NONE;
208 | }
209 | return PyLong_FromUnsignedLong(field_id);
210 | }
211 |
212 | PyObject *language_next_state(Language *self, PyObject *args) {
213 | uint16_t state_id, symbol;
214 | if (!PyArg_ParseTuple(args, "HH:next_state", &state_id, &symbol)) {
215 | return NULL;
216 | }
217 | TSStateId state = ts_language_next_state(self->language, state_id, symbol);
218 | return PyLong_FromUnsignedLong(state);
219 | }
220 |
221 | PyObject *language_lookahead_iterator(Language *self, PyObject *args) {
222 | uint16_t state_id;
223 | if (!PyArg_ParseTuple(args, "H:lookahead_iterator", &state_id)) {
224 | return NULL;
225 | }
226 | TSLookaheadIterator *lookahead_iterator = ts_lookahead_iterator_new(self->language, state_id);
227 | if (lookahead_iterator == NULL) {
228 | Py_RETURN_NONE;
229 | }
230 | ModuleState *state = GET_MODULE_STATE(self);
231 | LookaheadIterator *iter = PyObject_New(LookaheadIterator, state->lookahead_iterator_type);
232 | if (iter == NULL) {
233 | return NULL;
234 | }
235 | iter->language = Py_NewRef(self);
236 | iter->lookahead_iterator = lookahead_iterator;
237 | return PyObject_Init((PyObject *)iter, state->lookahead_iterator_type);
238 | }
239 |
240 | PyObject *language_query(Language *self, PyObject *args) {
241 | ModuleState *state = GET_MODULE_STATE(self);
242 | char *source;
243 | Py_ssize_t length;
244 | if (!PyArg_ParseTuple(args, "s#:query", &source, &length)) {
245 | return NULL;
246 | }
247 | if (REPLACE("query()", "the Query() constructor") < 0) {
248 | return NULL;
249 | }
250 | return PyObject_CallFunction((PyObject *)state->query_type, "Os#", self, source, length);
251 | }
252 |
253 | PyObject *language_copy(Language *self, PyObject *Py_UNUSED(args)) {
254 | ModuleState *state = GET_MODULE_STATE(self);
255 | Language *copied = PyObject_New(Language, state->language_type);
256 | if (copied == NULL) {
257 | return NULL;
258 | }
259 | copied->language = (TSLanguage *)ts_language_copy(self->language);
260 | return PyObject_Init((PyObject *)copied, state->language_type);
261 | }
262 |
263 | PyDoc_STRVAR(language_subtypes_doc, "subtypes(self, supertype, /)\n--\n\n"
264 | "Get all subtype symbol IDs for a given supertype symbol.");
265 | PyDoc_STRVAR(language_node_kind_for_id_doc,
266 | "node_kind_for_id(self, id, /)\n--\n\n"
267 | "Get the name of the node kind for the given numerical id.");
268 | PyDoc_STRVAR(language_id_for_node_kind_doc, "id_for_node_kind(self, kind, named, /)\n--\n\n"
269 | "Get the numerical id for the given node kind.");
270 | PyDoc_STRVAR(language_node_kind_is_named_doc, "node_kind_is_named(self, id, /)\n--\n\n"
271 | "Check if the node type for the given numerical id "
272 | "is named (as opposed to an anonymous node type).");
273 | PyDoc_STRVAR(language_node_kind_is_visible_doc,
274 | "node_kind_is_visible(self, id, /)\n--\n\n"
275 | "Check if the node type for the given numerical id "
276 | "is visible (as opposed to an auxiliary node type).");
277 | PyDoc_STRVAR(language_node_kind_is_supertype_doc,
278 | "node_kind_is_supertype(self, id, /)\n--\n\n"
279 | "Check if the node type for the given numerical id is a supertype.\n\nSupertype "
280 | "nodes represent abstract categories of syntax nodes (e.g. \"expression\").");
281 | PyDoc_STRVAR(language_field_name_for_id_doc, "field_name_for_id(self, field_id, /)\n--\n\n"
282 | "Get the field name for the given numerical id.");
283 | PyDoc_STRVAR(language_field_id_for_name_doc, "field_id_for_name(self, name, /)\n--\n\n"
284 | "Get the numerical id for the given field name.");
285 | PyDoc_STRVAR(language_next_state_doc,
286 | "next_state(self, state, id, /)\n--\n\n"
287 | "Get the next parse state." DOC_TIP "Combine this with ``lookahead_iterator`` to "
288 | "generate completion suggestions or valid symbols in error nodes." DOC_EXAMPLES
289 | ">>> state = language.next_state(node.parse_state, node.grammar_id)");
290 | PyDoc_STRVAR(language_lookahead_iterator_doc,
291 | "lookahead_iterator(self, state, /)\n--\n\n"
292 | "Create a new :class:`LookaheadIterator` for this language and parse state.");
293 | PyDoc_STRVAR(
294 | language_query_doc,
295 | "query(self, source, /)\n--\n\n"
296 | "Create a new :class:`Query` from a string containing one or more S-expression patterns.");
297 | PyDoc_STRVAR(language_copy_doc, "copy(self, /)\n--\n\n"
298 | "Create a copy of the language.");
299 | PyDoc_STRVAR(language_copy2_doc, "__copy__(self, /)\n--\n\n"
300 | "Use :func:`copy.copy` to create a copy of the language.");
301 |
302 | static PyMethodDef language_methods[] = {
303 | {
304 | .ml_name = "subtypes",
305 | .ml_meth = (PyCFunction)language_subtypes,
306 | .ml_flags = METH_VARARGS,
307 | .ml_doc = language_subtypes_doc,
308 | },
309 | {
310 | .ml_name = "node_kind_for_id",
311 | .ml_meth = (PyCFunction)language_node_kind_for_id,
312 | .ml_flags = METH_VARARGS,
313 | .ml_doc = language_node_kind_for_id_doc,
314 | },
315 | {
316 | .ml_name = "id_for_node_kind",
317 | .ml_meth = (PyCFunction)language_id_for_node_kind,
318 | .ml_flags = METH_VARARGS,
319 | .ml_doc = language_id_for_node_kind_doc,
320 | },
321 | {
322 | .ml_name = "node_kind_is_named",
323 | .ml_meth = (PyCFunction)language_node_kind_is_named,
324 | .ml_flags = METH_VARARGS,
325 | .ml_doc = language_node_kind_is_named_doc,
326 | },
327 | {
328 | .ml_name = "node_kind_is_visible",
329 | .ml_meth = (PyCFunction)language_node_kind_is_visible,
330 | .ml_flags = METH_VARARGS,
331 | .ml_doc = language_node_kind_is_visible_doc,
332 | },
333 | {
334 | .ml_name = "node_kind_is_supertype",
335 | .ml_meth = (PyCFunction)language_node_kind_is_supertype,
336 | .ml_flags = METH_VARARGS,
337 | .ml_doc = language_node_kind_is_supertype_doc,
338 | },
339 | {
340 | .ml_name = "field_name_for_id",
341 | .ml_meth = (PyCFunction)language_field_name_for_id,
342 | .ml_flags = METH_VARARGS,
343 | .ml_doc = language_field_name_for_id_doc,
344 | },
345 | {
346 | .ml_name = "field_id_for_name",
347 | .ml_meth = (PyCFunction)language_field_id_for_name,
348 | .ml_flags = METH_VARARGS,
349 | .ml_doc = language_field_id_for_name_doc,
350 | },
351 | {
352 | .ml_name = "next_state",
353 | .ml_meth = (PyCFunction)language_next_state,
354 | .ml_flags = METH_VARARGS,
355 | .ml_doc = language_next_state_doc,
356 | },
357 | {
358 | .ml_name = "lookahead_iterator",
359 | .ml_meth = (PyCFunction)language_lookahead_iterator,
360 | .ml_flags = METH_VARARGS,
361 | .ml_doc = language_lookahead_iterator_doc,
362 | },
363 | {
364 | .ml_name = "query",
365 | .ml_meth = (PyCFunction)language_query,
366 | .ml_flags = METH_VARARGS,
367 | .ml_doc = language_query_doc,
368 | },
369 | {
370 | .ml_name = "copy",
371 | .ml_meth = (PyCFunction)language_copy,
372 | .ml_flags = METH_NOARGS,
373 | .ml_doc = language_copy_doc,
374 | },
375 | {.ml_name = "__copy__",
376 | .ml_meth = (PyCFunction)language_copy,
377 | .ml_flags = METH_NOARGS,
378 | .ml_doc = language_copy2_doc},
379 | {NULL},
380 | };
381 |
382 | static PyGetSetDef language_accessors[] = {
383 | {"name", (getter)language_get_name, NULL, PyDoc_STR("The name of the language."), NULL},
384 | {"version", (getter)language_get_version, NULL,
385 | PyDoc_STR("The ABI version number that indicates which version of "
386 | "the Tree-sitter CLI was used to generate this language."),
387 | NULL},
388 | {"abi_version", (getter)language_get_abi_version, NULL,
389 | PyDoc_STR("The ABI version number that indicates which version of "
390 | "the Tree-sitter CLI was used to generate this language."),
391 | NULL},
392 | {"semantic_version", (getter)language_get_semantic_version, NULL,
393 | PyDoc_STR("The `Semantic Version `_ of the language."), NULL},
394 | {"node_kind_count", (getter)language_get_node_kind_count, NULL,
395 | PyDoc_STR("The number of distinct node types in this language."), NULL},
396 | {"parse_state_count", (getter)language_get_parse_state_count, NULL,
397 | PyDoc_STR("The number of valid states in this language."), NULL},
398 | {"field_count", (getter)language_get_field_count, NULL,
399 | PyDoc_STR("The number of distinct field names in this language."), NULL},
400 | {"supertypes", (getter)language_get_supertypes, NULL,
401 | PyDoc_STR("The supertype symbols of the language."), NULL},
402 | {NULL},
403 | };
404 |
405 | static PyType_Slot language_type_slots[] = {
406 | {Py_tp_doc, PyDoc_STR("A class that defines how to parse a particular language.")},
407 | {Py_tp_init, language_init},
408 | {Py_tp_repr, language_repr},
409 | {Py_tp_hash, language_hash},
410 | {Py_tp_richcompare, language_compare},
411 | {Py_tp_dealloc, language_dealloc},
412 | {Py_tp_methods, language_methods},
413 | {Py_tp_getset, language_accessors},
414 | {0, NULL},
415 | };
416 |
417 | PyType_Spec language_type_spec = {
418 | .name = "tree_sitter.Language",
419 | .basicsize = sizeof(Language),
420 | .itemsize = 0,
421 | .flags = Py_TPFLAGS_DEFAULT,
422 | .slots = language_type_slots,
423 | };
424 |
--------------------------------------------------------------------------------
/tree_sitter/binding/lookahead_iterator.c:
--------------------------------------------------------------------------------
1 | #include "types.h"
2 |
3 | void lookahead_iterator_dealloc(LookaheadIterator *self) {
4 | if (self->lookahead_iterator) {
5 | ts_lookahead_iterator_delete(self->lookahead_iterator);
6 | }
7 | Py_XDECREF(self->language);
8 | Py_TYPE(self)->tp_free(self);
9 | }
10 |
11 | PyObject *lookahead_iterator_repr(LookaheadIterator *self) {
12 | return PyUnicode_FromFormat("", self->lookahead_iterator);
13 | }
14 |
15 | PyObject *lookahead_iterator_get_language(LookaheadIterator *self, void *Py_UNUSED(payload)) {
16 | TSLanguage *language_id =
17 | (TSLanguage *)ts_lookahead_iterator_language(self->lookahead_iterator);
18 | if (self->language == NULL || ((Language *)self->language)->language != language_id) {
19 | ModuleState *state = GET_MODULE_STATE(self);
20 | Language *language = PyObject_New(Language, state->language_type);
21 | if (language == NULL) {
22 | return NULL;
23 | }
24 | language->language = language_id;
25 | language->abi_version = ts_language_abi_version(language->language);
26 | self->language = PyObject_Init((PyObject *)language, state->language_type);
27 | }
28 | return Py_NewRef(self->language);
29 | }
30 |
31 | PyObject *lookahead_iterator_get_current_symbol(LookaheadIterator *self, void *Py_UNUSED(payload)) {
32 | TSSymbol symbol = ts_lookahead_iterator_current_symbol(self->lookahead_iterator);
33 | return PyLong_FromUnsignedLong(symbol);
34 | }
35 |
36 | PyObject *lookahead_iterator_get_current_symbol_name(LookaheadIterator *self,
37 | void *Py_UNUSED(payload)) {
38 | const char *name = ts_lookahead_iterator_current_symbol_name(self->lookahead_iterator);
39 | return PyUnicode_FromString(name);
40 | }
41 |
42 | PyObject *lookahead_iterator_reset(LookaheadIterator *self, PyObject *args, PyObject *kwargs) {
43 | uint16_t state_id;
44 | PyObject *language_obj = NULL;
45 | ModuleState *state = GET_MODULE_STATE(self);
46 | char *keywords[] = {"state", "language", NULL};
47 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "H|O!:reset", keywords, &state_id,
48 | state->language_type, &language_obj)) {
49 | return NULL;
50 | }
51 |
52 | bool result;
53 | if (language_obj == NULL) {
54 | result = ts_lookahead_iterator_reset_state(self->lookahead_iterator, state_id);
55 | } else {
56 | TSLanguage *language_id = ((Language *)language_obj)->language;
57 | result = ts_lookahead_iterator_reset(self->lookahead_iterator, language_id, state_id);
58 | }
59 | return PyBool_FromLong(result);
60 | }
61 |
62 | PyObject *lookahead_iterator_iter(LookaheadIterator *self) { return Py_NewRef(self); }
63 |
64 | PyObject *lookahead_iterator_next(LookaheadIterator *self) {
65 | if (!ts_lookahead_iterator_next(self->lookahead_iterator)) {
66 | PyErr_SetNone(PyExc_StopIteration);
67 | return NULL;
68 | }
69 | TSSymbol symbol = ts_lookahead_iterator_current_symbol(self->lookahead_iterator);
70 | const char *name = ts_lookahead_iterator_current_symbol_name(self->lookahead_iterator);
71 | PyObject *symbol_obj = PyLong_FromUnsignedLong(symbol), *name_obj = PyUnicode_FromString(name);
72 | PyObject *result = PyTuple_Pack(2, symbol_obj, name_obj);
73 | Py_XDECREF(symbol_obj);
74 | Py_XDECREF(name_obj);
75 | return result;
76 | }
77 |
78 | PyObject *lookahead_iterator_names(LookaheadIterator *self) {
79 | PyObject *result = PyList_New(0);
80 | while (ts_lookahead_iterator_next(self->lookahead_iterator)) {
81 | const char *name = ts_lookahead_iterator_current_symbol_name(self->lookahead_iterator);
82 | PyList_Append(result, PyUnicode_FromString(name));
83 | }
84 | return result;
85 | }
86 |
87 | PyObject *lookahead_iterator_symbols(LookaheadIterator *self) {
88 | PyObject *result = PyList_New(0);
89 | while (ts_lookahead_iterator_next(self->lookahead_iterator)) {
90 | TSSymbol symbol = ts_lookahead_iterator_current_symbol(self->lookahead_iterator);
91 | PyList_Append(result, PyLong_FromLong(symbol));
92 | }
93 | return result;
94 | }
95 |
96 | PyDoc_STRVAR(lookahead_iterator_reset_doc,
97 | "reset(self, state, language=None)\n--\n\n"
98 | "Reset the lookahead iterator." DOC_RETURNS
99 | "``True`` if it was reset successfully or ``False`` if it failed.");
100 | PyDoc_STRVAR(lookahead_iterator_names_doc, "names(self, /)\n--\n\n"
101 | "Get a list of all symbol names.");
102 | PyDoc_STRVAR(lookahead_iterator_symbols_doc, "symbols(self, /)\n--\n\n"
103 | "Get a list of all symbol IDs.");
104 |
105 | static PyMethodDef lookahead_iterator_methods[] = {
106 | {
107 | .ml_name = "reset",
108 | .ml_meth = (PyCFunction)lookahead_iterator_reset,
109 | .ml_flags = METH_VARARGS | METH_KEYWORDS,
110 | .ml_doc = lookahead_iterator_reset_doc,
111 | },
112 | {
113 | .ml_name = "names",
114 | .ml_meth = (PyCFunction)lookahead_iterator_names,
115 | .ml_flags = METH_NOARGS,
116 | .ml_doc = lookahead_iterator_names_doc,
117 | },
118 | {
119 | .ml_name = "symbols",
120 | .ml_meth = (PyCFunction)lookahead_iterator_symbols,
121 | .ml_flags = METH_NOARGS,
122 | .ml_doc = lookahead_iterator_symbols_doc,
123 | },
124 | {NULL},
125 | };
126 |
127 | static PyGetSetDef lookahead_iterator_accessors[] = {
128 | {"language", (getter)lookahead_iterator_get_language, NULL, PyDoc_STR("The current language."),
129 | NULL},
130 | {"current_symbol", (getter)lookahead_iterator_get_current_symbol, NULL,
131 | PyDoc_STR("The current symbol ID.\n\n"
132 | "Newly created iterators will return the ``ERROR`` symbol."),
133 | NULL},
134 | {"current_symbol_name", (getter)lookahead_iterator_get_current_symbol_name, NULL,
135 | PyDoc_STR("The current symbol name."), NULL},
136 | {NULL},
137 | };
138 |
139 | static PyType_Slot lookahead_iterator_type_slots[] = {
140 | {Py_tp_doc,
141 | PyDoc_STR(
142 | "A class that is used to look up symbols valid in a specific parse state." DOC_TIP
143 | "Lookahead iterators can be useful to generate suggestions and improve syntax error "
144 | "diagnostics.\n\nTo get symbols valid in an ``ERROR`` node, use the lookahead iterator "
145 | "on its first leaf node state.\nFor ``MISSING`` nodes, a lookahead iterator created "
146 | "on the previous non-extra leaf node may be appropriate.")},
147 | {Py_tp_new, NULL},
148 | {Py_tp_dealloc, lookahead_iterator_dealloc},
149 | {Py_tp_repr, lookahead_iterator_repr},
150 | {Py_tp_iter, lookahead_iterator_iter},
151 | {Py_tp_iternext, lookahead_iterator_next},
152 | {Py_tp_methods, lookahead_iterator_methods},
153 | {Py_tp_getset, lookahead_iterator_accessors},
154 | {0, NULL},
155 | };
156 |
157 | PyType_Spec lookahead_iterator_type_spec = {
158 | .name = "tree_sitter.LookaheadIterator",
159 | .basicsize = sizeof(LookaheadIterator),
160 | .itemsize = 0,
161 | .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
162 | .slots = lookahead_iterator_type_slots,
163 | };
164 |
--------------------------------------------------------------------------------
/tree_sitter/binding/module.c:
--------------------------------------------------------------------------------
1 | #include "types.h"
2 |
3 | extern PyType_Spec language_type_spec;
4 | extern PyType_Spec lookahead_iterator_type_spec;
5 | extern PyType_Spec node_type_spec;
6 | extern PyType_Spec parser_type_spec;
7 | extern PyType_Spec query_cursor_type_spec;
8 | extern PyType_Spec query_predicate_anyof_type_spec;
9 | extern PyType_Spec query_predicate_eq_capture_type_spec;
10 | extern PyType_Spec query_predicate_eq_string_type_spec;
11 | extern PyType_Spec query_predicate_generic_type_spec;
12 | extern PyType_Spec query_predicate_match_type_spec;
13 | extern PyType_Spec query_type_spec;
14 | extern PyType_Spec range_type_spec;
15 | extern PyType_Spec tree_cursor_type_spec;
16 | extern PyType_Spec tree_type_spec;
17 |
18 | static inline PyObject *import_attribute(const char *mod, const char *attr) {
19 | PyObject *module = PyImport_ImportModule(mod);
20 | if (module == NULL) {
21 | return NULL;
22 | }
23 | PyObject *import = PyObject_GetAttrString(module, attr);
24 | Py_DECREF(module);
25 | return import;
26 | }
27 |
28 | static void module_free(void *self) {
29 | ModuleState *state = PyModule_GetState((PyObject *)self);
30 | ts_tree_cursor_delete(&state->default_cursor);
31 | Py_XDECREF(state->language_type);
32 | Py_XDECREF(state->log_type_type);
33 | Py_XDECREF(state->lookahead_iterator_type);
34 | Py_XDECREF(state->node_type);
35 | Py_XDECREF(state->parser_type);
36 | Py_XDECREF(state->point_type);
37 | Py_XDECREF(state->query_predicate_anyof_type);
38 | Py_XDECREF(state->query_predicate_eq_capture_type);
39 | Py_XDECREF(state->query_predicate_eq_string_type);
40 | Py_XDECREF(state->query_predicate_generic_type);
41 | Py_XDECREF(state->query_predicate_match_type);
42 | Py_XDECREF(state->query_type);
43 | Py_XDECREF(state->range_type);
44 | Py_XDECREF(state->tree_cursor_type);
45 | Py_XDECREF(state->tree_type);
46 | Py_XDECREF(state->query_error);
47 | Py_XDECREF(state->re_compile);
48 | }
49 |
50 | static struct PyModuleDef module_definition = {
51 | .m_base = PyModuleDef_HEAD_INIT,
52 | .m_name = "_binding",
53 | .m_doc = NULL,
54 | .m_size = sizeof(ModuleState),
55 | .m_free = module_free,
56 | };
57 |
58 | PyMODINIT_FUNC PyInit__binding(void) {
59 | PyObject *module = PyModule_Create(&module_definition);
60 | if (module == NULL) {
61 | return NULL;
62 | }
63 |
64 | ModuleState *state = PyModule_GetState(module);
65 |
66 | ts_set_allocator(PyMem_Malloc, PyMem_Calloc, PyMem_Realloc, PyMem_Free);
67 |
68 | state->language_type =
69 | (PyTypeObject *)PyType_FromModuleAndSpec(module, &language_type_spec, NULL);
70 | state->lookahead_iterator_type =
71 | (PyTypeObject *)PyType_FromModuleAndSpec(module, &lookahead_iterator_type_spec, NULL);
72 | state->node_type = (PyTypeObject *)PyType_FromModuleAndSpec(module, &node_type_spec, NULL);
73 | state->parser_type = (PyTypeObject *)PyType_FromModuleAndSpec(module, &parser_type_spec, NULL);
74 | state->query_predicate_anyof_type =
75 | (PyTypeObject *)PyType_FromModuleAndSpec(module, &query_predicate_anyof_type_spec, NULL);
76 | state->query_predicate_eq_capture_type = (PyTypeObject *)PyType_FromModuleAndSpec(
77 | module, &query_predicate_eq_capture_type_spec, NULL);
78 | state->query_predicate_eq_string_type = (PyTypeObject *)PyType_FromModuleAndSpec(
79 | module, &query_predicate_eq_string_type_spec, NULL);
80 | state->query_predicate_generic_type =
81 | (PyTypeObject *)PyType_FromModuleAndSpec(module, &query_predicate_generic_type_spec, NULL);
82 | state->query_predicate_match_type =
83 | (PyTypeObject *)PyType_FromModuleAndSpec(module, &query_predicate_match_type_spec, NULL);
84 | state->query_type = (PyTypeObject *)PyType_FromModuleAndSpec(module, &query_type_spec, NULL);
85 | state->query_cursor_type =
86 | (PyTypeObject *)PyType_FromModuleAndSpec(module, &query_cursor_type_spec, NULL);
87 | state->range_type = (PyTypeObject *)PyType_FromModuleAndSpec(module, &range_type_spec, NULL);
88 | state->tree_cursor_type =
89 | (PyTypeObject *)PyType_FromModuleAndSpec(module, &tree_cursor_type_spec, NULL);
90 | state->tree_type = (PyTypeObject *)PyType_FromModuleAndSpec(module, &tree_type_spec, NULL);
91 |
92 | if ((PyModule_AddObjectRef(module, "Language", (PyObject *)state->language_type) < 0) ||
93 | (PyModule_AddObjectRef(module, "LookaheadIterator",
94 | (PyObject *)state->lookahead_iterator_type) < 0) ||
95 | (PyModule_AddObjectRef(module, "Node", (PyObject *)state->node_type) < 0) ||
96 | (PyModule_AddObjectRef(module, "Parser", (PyObject *)state->parser_type) < 0) ||
97 | (PyModule_AddObjectRef(module, "Query", (PyObject *)state->query_type) < 0) ||
98 | (PyModule_AddObjectRef(module, "QueryCursor", (PyObject *)state->query_cursor_type) < 0) ||
99 | (PyModule_AddObjectRef(module, "QueryPredicateAnyof",
100 | (PyObject *)state->query_predicate_anyof_type) < 0) ||
101 | (PyModule_AddObjectRef(module, "QueryPredicateEqCapture",
102 | (PyObject *)state->query_predicate_eq_capture_type) < 0) ||
103 | (PyModule_AddObjectRef(module, "QueryPredicateEqString",
104 | (PyObject *)state->query_predicate_eq_string_type) < 0) ||
105 | (PyModule_AddObjectRef(module, "QueryPredicateGeneric",
106 | (PyObject *)state->query_predicate_generic_type) < 0) ||
107 | (PyModule_AddObjectRef(module, "QueryPredicateMatch",
108 | (PyObject *)state->query_predicate_match_type) < 0) ||
109 | (PyModule_AddObjectRef(module, "Range", (PyObject *)state->range_type) < 0) ||
110 | (PyModule_AddObjectRef(module, "Tree", (PyObject *)state->tree_type) < 0) ||
111 | (PyModule_AddObjectRef(module, "TreeCursor", (PyObject *)state->tree_cursor_type) < 0)) {
112 | goto cleanup;
113 | }
114 |
115 | state->query_error = PyErr_NewExceptionWithDoc(
116 | "tree_sitter.QueryError",
117 | PyDoc_STR("An error that occurred while attempting to create a :class:`Query`."),
118 | PyExc_ValueError, NULL);
119 | if (state->query_error == NULL ||
120 | PyModule_AddObjectRef(module, "QueryError", state->query_error) < 0) {
121 | goto cleanup;
122 | }
123 |
124 | state->re_compile = import_attribute("re", "compile");
125 | if (state->re_compile == NULL) {
126 | goto cleanup;
127 | }
128 |
129 | PyObject *namedtuple = import_attribute("collections", "namedtuple");
130 | if (namedtuple == NULL) {
131 | goto cleanup;
132 | }
133 | PyObject *point_args = Py_BuildValue("s[ss]", "Point", "row", "column");
134 | PyObject *point_kwargs = PyDict_New();
135 | PyDict_SetItemString(point_kwargs, "module", PyUnicode_FromString("tree_sitter"));
136 | state->point_type = (PyTypeObject *)PyObject_Call(namedtuple, point_args, point_kwargs);
137 | Py_DECREF(point_args);
138 | Py_DECREF(point_kwargs);
139 | Py_DECREF(namedtuple);
140 | if (state->point_type == NULL ||
141 | PyModule_AddObjectRef(module, "Point", (PyObject *)state->point_type) < 0) {
142 | goto cleanup;
143 | }
144 |
145 | PyObject *int_enum = import_attribute("enum", "IntEnum");
146 | if (int_enum == NULL) {
147 | goto cleanup;
148 | }
149 | state->log_type_type = (PyTypeObject *)PyObject_CallFunction(
150 | int_enum, "s{sisi}", "LogType", "PARSE", TSLogTypeParse, "LEX", TSLogTypeLex);
151 | if (state->log_type_type == NULL ||
152 | PyModule_AddObjectRef(module, "LogType", (PyObject *)state->log_type_type) < 0) {
153 | goto cleanup;
154 | };
155 | Py_DECREF(int_enum);
156 |
157 | PyModule_AddIntConstant(module, "LANGUAGE_VERSION", TREE_SITTER_LANGUAGE_VERSION);
158 | PyModule_AddIntConstant(module, "MIN_COMPATIBLE_LANGUAGE_VERSION",
159 | TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION);
160 |
161 | #ifdef Py_GIL_DISABLED
162 | PyUnstable_Module_SetGIL(module, Py_MOD_GIL_USED);
163 | #endif
164 | return module;
165 |
166 | cleanup:
167 | Py_XDECREF(module);
168 | return NULL;
169 | }
170 |
--------------------------------------------------------------------------------
/tree_sitter/binding/parser.c:
--------------------------------------------------------------------------------
1 | #include "types.h"
2 |
3 | #define SET_ATTRIBUTE_ERROR(name) \
4 | (name != NULL && name != Py_None && parser_set_##name(self, name, NULL) < 0)
5 |
6 | typedef struct {
7 | PyObject *read_cb;
8 | PyObject *previous_retval;
9 | ModuleState *state;
10 | } ReadWrapperPayload;
11 |
12 | typedef struct {
13 | PyObject *callback;
14 | PyTypeObject *log_type_type;
15 | } LoggerPayload;
16 |
17 | static void free_logger(const TSParser *parser) {
18 | TSLogger logger = ts_parser_logger(parser);
19 | if (logger.payload != NULL) {
20 | PyMem_Free(logger.payload);
21 | }
22 | }
23 |
24 | PyObject *parser_new(PyTypeObject *cls, PyObject *Py_UNUSED(args), PyObject *Py_UNUSED(kwargs)) {
25 | Parser *self = (Parser *)cls->tp_alloc(cls, 0);
26 | if (self != NULL) {
27 | self->parser = ts_parser_new();
28 | self->language = NULL;
29 | self->logger = NULL;
30 | }
31 | return (PyObject *)self;
32 | }
33 |
34 | void parser_dealloc(Parser *self) {
35 | free_logger(self->parser);
36 | ts_parser_delete(self->parser);
37 | Py_XDECREF(self->language);
38 | Py_XDECREF(self->logger);
39 | Py_TYPE(self)->tp_free(self);
40 | }
41 |
42 | static const char *parser_read_wrapper(void *payload, uint32_t byte_offset, TSPoint position,
43 | uint32_t *bytes_read) {
44 | ReadWrapperPayload *wrapper_payload = (ReadWrapperPayload *)payload;
45 | PyObject *read_cb = wrapper_payload->read_cb;
46 |
47 | // We assume that the parser only needs the return value until the next time
48 | // this function is called or when ts_parser_parse() returns. We store the
49 | // return value from the callable in wrapper_payload->previous_return_value so
50 | // that its reference count will be decremented either during the next call to
51 | // this wrapper or after ts_parser_parse() has returned.
52 | Py_XDECREF(wrapper_payload->previous_retval);
53 | wrapper_payload->previous_retval = NULL;
54 |
55 | // Form arguments to callable.
56 | PyObject *byte_offset_obj = PyLong_FromUnsignedLong(byte_offset);
57 | PyObject *position_obj = POINT_NEW(wrapper_payload->state, position);
58 | if (!position_obj || !byte_offset_obj) {
59 | *bytes_read = 0;
60 | return NULL;
61 | }
62 |
63 | PyObject *args = PyTuple_Pack(2, byte_offset_obj, position_obj);
64 | Py_XDECREF(byte_offset_obj);
65 | Py_XDECREF(position_obj);
66 |
67 | // Call callable.
68 | PyObject *rv = PyObject_Call(read_cb, args, NULL);
69 | Py_XDECREF(args);
70 |
71 | // If error or None returned, we're done parsing.
72 | if (rv == NULL || rv == Py_None) {
73 | Py_XDECREF(rv);
74 | *bytes_read = 0;
75 | return NULL;
76 | }
77 |
78 | // If something other than None is returned, it must be a bytes object.
79 | if (!PyBytes_Check(rv)) {
80 | Py_XDECREF(rv);
81 | PyErr_SetString(PyExc_TypeError, "read callable must return a bytestring");
82 | *bytes_read = 0;
83 | return NULL;
84 | }
85 |
86 | // Store return value in payload so its reference count can be decremented and
87 | // return string representation of bytes.
88 | wrapper_payload->previous_retval = rv;
89 | *bytes_read = (uint32_t)PyBytes_Size(rv);
90 | return PyBytes_AsString(rv);
91 | }
92 |
93 | static bool parser_progress_callback(TSParseState *state) {
94 | PyObject *result = PyObject_CallFunction((PyObject *)state->payload, "Ip",
95 | state->current_byte_offset, state->has_error);
96 | return PyObject_IsTrue(result);
97 | }
98 |
99 | PyObject *parser_parse(Parser *self, PyObject *args, PyObject *kwargs) {
100 | ModuleState *state = GET_MODULE_STATE(self);
101 | PyObject *source_or_callback;
102 | PyObject *old_tree_obj = NULL;
103 | PyObject *encoding_obj = NULL;
104 | PyObject *progress_callback_obj = NULL;
105 | bool keep_text = true;
106 | char *keywords[] = {"", "old_tree", "encoding", "progress_callback", NULL};
107 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O!OO:parse", keywords, &source_or_callback,
108 | state->tree_type, &old_tree_obj, &encoding_obj,
109 | &progress_callback_obj)) {
110 | return NULL;
111 | }
112 |
113 | const TSTree *old_tree = old_tree_obj ? ((Tree *)old_tree_obj)->tree : NULL;
114 | TSInputEncoding input_encoding = TSInputEncodingUTF8;
115 | if (encoding_obj != NULL) {
116 | if (!PyUnicode_CheckExact(encoding_obj)) {
117 | PyErr_Format(PyExc_TypeError, "encoding must be str, not %s",
118 | encoding_obj->ob_type->tp_name);
119 | return NULL;
120 | } else if (PyUnicode_CompareWithASCIIString(encoding_obj, "utf8") == 0) {
121 | input_encoding = TSInputEncodingUTF8;
122 | } else if (PyUnicode_CompareWithASCIIString(encoding_obj, "utf16le") == 0) {
123 | input_encoding = TSInputEncodingUTF16LE;
124 | } else if (PyUnicode_CompareWithASCIIString(encoding_obj, "utf16be") == 0) {
125 | input_encoding = TSInputEncodingUTF16BE;
126 | } else if (PyUnicode_CompareWithASCIIString(encoding_obj, "utf16") == 0) {
127 | PyObject *byteorder = PySys_GetObject("byteorder");
128 | bool little_endian = PyUnicode_CompareWithASCIIString(byteorder, "little") == 0;
129 | input_encoding = little_endian ? TSInputEncodingUTF16LE : TSInputEncodingUTF16BE;
130 | } else {
131 | PyErr_Format(PyExc_ValueError,
132 | "encoding must be 'utf8', 'utf16', 'utf16le', or 'utf16be', not '%s'",
133 | PyUnicode_AsUTF8(encoding_obj));
134 | return NULL;
135 | }
136 | }
137 |
138 | TSTree *new_tree = NULL;
139 | Py_buffer source_view;
140 | if (PyObject_GetBuffer(source_or_callback, &source_view, PyBUF_SIMPLE) > -1) {
141 | if (progress_callback_obj != NULL) {
142 | const char *warning = "The progress_callback is ignored when parsing a bytestring";
143 | if (PyErr_WarnEx(PyExc_UserWarning, warning, 1) < 0) {
144 | return NULL;
145 | }
146 | }
147 | // parse a buffer
148 | const char *source_bytes = (const char *)source_view.buf;
149 | uint32_t length = (uint32_t)source_view.len;
150 | new_tree = ts_parser_parse_string_encoding(self->parser, old_tree, source_bytes, length,
151 | input_encoding);
152 | PyBuffer_Release(&source_view);
153 | } else if (PyCallable_Check(source_or_callback)) {
154 | // clear the GetBuffer error
155 | PyErr_Clear();
156 | // parse a callable
157 | ReadWrapperPayload payload = {
158 | .state = state,
159 | .read_cb = source_or_callback,
160 | .previous_retval = NULL,
161 | };
162 | TSInput input = {
163 | .payload = &payload,
164 | .read = parser_read_wrapper,
165 | .encoding = input_encoding,
166 | .decode = NULL,
167 | };
168 | if (progress_callback_obj == NULL) {
169 | new_tree = ts_parser_parse(self->parser, old_tree, input);
170 | } else if (!PyCallable_Check(progress_callback_obj)) {
171 | PyErr_Format(PyExc_TypeError, "progress_callback must be a callable, not %s",
172 | progress_callback_obj->ob_type->tp_name);
173 | return NULL;
174 | } else {
175 | TSParseOptions options = {
176 | .payload = progress_callback_obj,
177 | .progress_callback = parser_progress_callback,
178 | };
179 | new_tree = ts_parser_parse_with_options(self->parser, old_tree, input, options);
180 | }
181 | Py_XDECREF(payload.previous_retval);
182 |
183 | } else {
184 | PyErr_Format(PyExc_TypeError, "source must be a bytestring or a callable, not %s",
185 | source_or_callback->ob_type->tp_name);
186 | return NULL;
187 | }
188 |
189 | if (PyErr_Occurred()) {
190 | return NULL;
191 | }
192 | if (!new_tree) {
193 | PyErr_SetString(PyExc_ValueError, "Parsing failed");
194 | return NULL;
195 | }
196 |
197 | Tree *tree = PyObject_New(Tree, state->tree_type);
198 | if (tree == NULL) {
199 | return NULL;
200 | }
201 | tree->tree = new_tree;
202 | tree->language = self->language;
203 | tree->source = keep_text ? source_or_callback : Py_None;
204 | Py_INCREF(tree->source);
205 | return PyObject_Init((PyObject *)tree, state->tree_type);
206 | }
207 |
208 | PyObject *parser_reset(Parser *self, void *Py_UNUSED(payload)) {
209 | ts_parser_reset(self->parser);
210 | Py_RETURN_NONE;
211 | }
212 |
213 | PyObject *parser_print_dot_graphs(Parser *self, PyObject *arg) {
214 | if (arg == Py_None) {
215 | ts_parser_print_dot_graphs(self->parser, -1);
216 | } else {
217 | int fd = PyObject_AsFileDescriptor(arg);
218 | if (fd < 0) {
219 | return NULL;
220 | }
221 | Py_BEGIN_ALLOW_THREADS
222 | ts_parser_print_dot_graphs(self->parser, fd);
223 | Py_END_ALLOW_THREADS
224 | }
225 | Py_RETURN_NONE;
226 | }
227 |
228 | PyObject *parser_get_timeout_micros(Parser *self, void *Py_UNUSED(payload)) {
229 | if (DEPRECATE("Use the progress_callback in parse()") < 0) {
230 | return NULL;
231 | }
232 | return PyLong_FromUnsignedLong(ts_parser_timeout_micros(self->parser));
233 | }
234 |
235 | int parser_set_timeout_micros(Parser *self, PyObject *arg, void *Py_UNUSED(payload)) {
236 | if (DEPRECATE("Use the progress_callback in parse()") < 0) {
237 | return -1;
238 | }
239 | if (arg == NULL || arg == Py_None) {
240 | ts_parser_set_timeout_micros(self->parser, 0);
241 | return 0;
242 | }
243 | if (!PyLong_Check(arg)) {
244 | PyErr_Format(PyExc_TypeError, "'timeout_micros' must be assigned an int, not %s",
245 | arg->ob_type->tp_name);
246 | return -1;
247 | }
248 |
249 | ts_parser_set_timeout_micros(self->parser, PyLong_AsSize_t(arg));
250 | return 0;
251 | }
252 |
253 | PyObject *parser_get_included_ranges(Parser *self, void *Py_UNUSED(payload)) {
254 | uint32_t count;
255 | const TSRange *ranges = ts_parser_included_ranges(self->parser, &count);
256 | if (count == 0) {
257 | return PyList_New(0);
258 | }
259 |
260 | ModuleState *state = GET_MODULE_STATE(self);
261 | PyObject *list = PyList_New(count);
262 | for (uint32_t i = 0; i < count; ++i) {
263 | Range *range = PyObject_New(Range, state->range_type);
264 | if (range == NULL) {
265 | return NULL;
266 | }
267 | range->range = ranges[i];
268 | PyList_SET_ITEM(list, i, PyObject_Init((PyObject *)range, state->range_type));
269 | }
270 | return list;
271 | }
272 |
273 | int parser_set_included_ranges(Parser *self, PyObject *arg, void *Py_UNUSED(payload)) {
274 | if (arg == NULL || arg == Py_None) {
275 | ts_parser_set_included_ranges(self->parser, NULL, 0);
276 | return 0;
277 | }
278 | if (!PyList_Check(arg)) {
279 | PyErr_Format(PyExc_TypeError, "'included_ranges' must be assigned a list, not %s",
280 | arg->ob_type->tp_name);
281 | return -1;
282 | }
283 |
284 | uint32_t length = (uint32_t)PyList_Size(arg);
285 | TSRange *ranges = PyMem_Calloc(length, sizeof(TSRange));
286 | if (!ranges) {
287 | PyErr_Format(PyExc_MemoryError, "Failed to allocate memory for ranges of length %u",
288 | length);
289 | return -1;
290 | }
291 |
292 | ModuleState *state = GET_MODULE_STATE(self);
293 | for (uint32_t i = 0; i < length; ++i) {
294 | PyObject *range = PyList_GetItem(arg, i);
295 | if (!PyObject_IsInstance(range, (PyObject *)state->range_type)) {
296 | PyErr_Format(PyExc_TypeError, "Item at index %u is not a tree_sitter.Range object", i);
297 | PyMem_Free(ranges);
298 | return -1;
299 | }
300 | ranges[i] = ((Range *)range)->range;
301 | }
302 |
303 | if (!ts_parser_set_included_ranges(self->parser, ranges, length)) {
304 | PyErr_SetString(PyExc_ValueError, "Included ranges cannot overlap");
305 | PyMem_Free(ranges);
306 | return -1;
307 | }
308 |
309 | PyMem_Free(ranges);
310 | return 0;
311 | }
312 |
313 | PyObject *parser_get_language(Parser *self, void *Py_UNUSED(payload)) {
314 | if (!self->language) {
315 | Py_RETURN_NONE;
316 | }
317 | return Py_NewRef(self->language);
318 | }
319 |
320 | PyObject *parser_get_logger(Parser *self, void *Py_UNUSED(payload)) {
321 | if (!self->logger) {
322 | Py_RETURN_NONE;
323 | }
324 | return Py_NewRef(self->logger);
325 | }
326 |
327 | static void log_callback(void *payload, TSLogType log_type, const char *buffer) {
328 | LoggerPayload *logger_payload = (LoggerPayload *)payload;
329 | PyObject *log_type_enum =
330 | PyObject_CallFunction((PyObject *)logger_payload->log_type_type, "i", log_type);
331 | PyObject_CallFunction(logger_payload->callback, "Os", log_type_enum, buffer);
332 | }
333 |
334 | int parser_set_logger(Parser *self, PyObject *arg, void *Py_UNUSED(payload)) {
335 | free_logger(self->parser);
336 |
337 | if (arg == NULL || arg == Py_None) {
338 | Py_XDECREF(self->logger);
339 | self->logger = NULL;
340 | TSLogger logger = {NULL, NULL};
341 | ts_parser_set_logger(self->parser, logger);
342 | return 0;
343 | }
344 | if (!PyCallable_Check(arg)) {
345 | PyErr_Format(PyExc_TypeError, "logger must be assigned a callable object, not %s",
346 | arg->ob_type->tp_name);
347 | return -1;
348 | }
349 |
350 | Py_XSETREF(self->logger, Py_NewRef(arg));
351 |
352 | ModuleState *state = GET_MODULE_STATE(self);
353 | LoggerPayload *payload = PyMem_Malloc(sizeof(LoggerPayload));
354 | payload->callback = self->logger;
355 | payload->log_type_type = state->log_type_type;
356 | TSLogger logger = {payload, log_callback};
357 | ts_parser_set_logger(self->parser, logger);
358 |
359 | return 0;
360 | }
361 |
362 | int parser_set_language(Parser *self, PyObject *arg, void *Py_UNUSED(payload)) {
363 | if (arg == NULL || arg == Py_None) {
364 | self->language = NULL;
365 | return 0;
366 | }
367 | if (!IS_INSTANCE(arg, language_type)) {
368 | PyErr_Format(PyExc_TypeError,
369 | "language must be assigned a tree_sitter.Language object, not %s",
370 | arg->ob_type->tp_name);
371 | return -1;
372 | }
373 |
374 | Language *language = (Language *)arg;
375 | if (language->abi_version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION ||
376 | TREE_SITTER_LANGUAGE_VERSION < language->abi_version) {
377 | PyErr_Format(PyExc_ValueError,
378 | "Incompatible Language version %u. Must be between %u and %u",
379 | language->abi_version, TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION,
380 | TREE_SITTER_LANGUAGE_VERSION);
381 | return -1;
382 | }
383 |
384 | if (!ts_parser_set_language(self->parser, language->language)) {
385 | PyErr_SetString(PyExc_RuntimeError, "Failed to set the parser language");
386 | return -1;
387 | }
388 |
389 | Py_XSETREF(self->language, Py_NewRef(language));
390 | return 0;
391 | }
392 |
393 | int parser_init(Parser *self, PyObject *args, PyObject *kwargs) {
394 | ModuleState *state = GET_MODULE_STATE(self);
395 | PyObject *language = NULL, *included_ranges = NULL, *timeout_micros = NULL, *logger = NULL;
396 | char *keywords[] = {"language", "included_ranges", "timeout_micros", "logger", NULL};
397 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!$OOO:__init__", keywords,
398 | state->language_type, &language, &included_ranges,
399 | &timeout_micros, &logger)) {
400 | return -1;
401 | }
402 |
403 | if (SET_ATTRIBUTE_ERROR(language)) {
404 | return -1;
405 | }
406 | if (SET_ATTRIBUTE_ERROR(included_ranges)) {
407 | return -1;
408 | }
409 | if (SET_ATTRIBUTE_ERROR(timeout_micros)) {
410 | return -1;
411 | }
412 | if (SET_ATTRIBUTE_ERROR(logger)) {
413 | return -1;
414 | }
415 | return 0;
416 | }
417 |
418 | PyDoc_STRVAR(
419 | parser_parse_doc,
420 | "parse(self, source, /, old_tree=None, encoding=\"utf8\")\n--\n\n"
421 | "Parse a slice of a bytestring or bytes provided in chunks by a callback.\n\n"
422 | "The callback function takes a byte offset and position and returns a bytestring starting "
423 | "at that offset and position. The slices can be of any length. If the given position "
424 | "is at the end of the text, the callback should return an empty slice." DOC_RETURNS
425 | "A :class:`Tree` if parsing succeeded or ``None`` if the parser does not have an "
426 | "assigned language or the timeout expired.");
427 | PyDoc_STRVAR(
428 | parser_reset_doc,
429 | "reset(self, /)\n--\n\n"
430 | "Instruct the parser to start the next parse from the beginning." DOC_NOTE
431 | "If the parser previously failed because of a timeout, then by default, it will resume where "
432 | "it left off on the next call to :meth:`parse`.\nIf you don't want to resume, and instead "
433 | "intend to use this parser to parse some other document, you must call :meth:`reset` first.");
434 | PyDoc_STRVAR(parser_print_dot_graphs_doc,
435 | "print_dot_graphs(self, /, file)\n--\n\n"
436 | "Set the file descriptor to which the parser should write debugging "
437 | "graphs during parsing. The graphs are formatted in the DOT language. "
438 | "You can turn off this logging by passing ``None``.");
439 |
440 | static PyMethodDef parser_methods[] = {
441 | {
442 | .ml_name = "parse",
443 | .ml_meth = (PyCFunction)parser_parse,
444 | .ml_flags = METH_VARARGS | METH_KEYWORDS,
445 | .ml_doc = parser_parse_doc,
446 | },
447 | {
448 | .ml_name = "reset",
449 | .ml_meth = (PyCFunction)parser_reset,
450 | .ml_flags = METH_NOARGS,
451 | .ml_doc = parser_reset_doc,
452 | },
453 | {
454 | .ml_name = "print_dot_graphs",
455 | .ml_meth = (PyCFunction)parser_print_dot_graphs,
456 | .ml_flags = METH_O,
457 | .ml_doc = parser_print_dot_graphs_doc,
458 | },
459 | {NULL},
460 | };
461 |
462 | static PyGetSetDef parser_accessors[] = {
463 | {"language", (getter)parser_get_language, (setter)parser_set_language,
464 | PyDoc_STR("The language that will be used for parsing."), NULL},
465 | {"included_ranges", (getter)parser_get_included_ranges, (setter)parser_set_included_ranges,
466 | PyDoc_STR("The ranges of text that the parser will include when parsing."), NULL},
467 | {"timeout_micros", (getter)parser_get_timeout_micros, (setter)parser_set_timeout_micros,
468 | PyDoc_STR("The duration in microseconds that parsing is allowed to take."), NULL},
469 | {"logger", (getter)parser_get_logger, (setter)parser_set_logger,
470 | PyDoc_STR("The logger that the parser should use during parsing."), NULL},
471 | {NULL},
472 | };
473 |
474 | static PyType_Slot parser_type_slots[] = {
475 | {Py_tp_doc,
476 | PyDoc_STR("A class that is used to produce a :class:`Tree` based on some source code.")},
477 | {Py_tp_new, parser_new},
478 | {Py_tp_init, parser_init},
479 | {Py_tp_dealloc, parser_dealloc},
480 | {Py_tp_methods, parser_methods},
481 | {Py_tp_getset, parser_accessors},
482 | {0, NULL},
483 | };
484 |
485 | PyType_Spec parser_type_spec = {
486 | .name = "tree_sitter.Parser",
487 | .basicsize = sizeof(Parser),
488 | .itemsize = 0,
489 | .flags = Py_TPFLAGS_DEFAULT,
490 | .slots = parser_type_slots,
491 | };
492 |
--------------------------------------------------------------------------------
/tree_sitter/binding/query_cursor.c:
--------------------------------------------------------------------------------
1 | #include "types.h"
2 |
3 | #include
4 |
5 | PyObject *node_new_internal(ModuleState *state, TSNode node, PyObject *tree);
6 |
7 | bool query_satisfies_predicates(Query *query, TSQueryMatch match, Tree *tree, PyObject *callable);
8 |
9 | void query_cursor_dealloc(QueryCursor *self) {
10 | ts_query_cursor_delete(self->cursor);
11 | Py_XDECREF(self->query);
12 | Py_TYPE(self)->tp_free(self);
13 | }
14 |
15 | PyObject *query_cursor_new(PyTypeObject *cls, PyObject *Py_UNUSED(args),
16 | PyObject *Py_UNUSED(kwargs)) {
17 | QueryCursor *self = (QueryCursor *)cls->tp_alloc(cls, 0);
18 | if (self != NULL) {
19 | self->cursor = ts_query_cursor_new();
20 | }
21 | return (PyObject *)self;
22 | }
23 |
24 | int query_cursor_init(QueryCursor *self, PyObject *args, PyObject *kwargs) {
25 | ModuleState *state = GET_MODULE_STATE(self);
26 | PyObject *query = NULL;
27 | uint32_t match_limit = UINT32_MAX;
28 | uint64_t timeout_micros = 0;
29 | char *keywords[] = {"query", "match_limit", "timeout_micros", NULL};
30 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!|$II:__init__", keywords, state->query_type,
31 | &query, &match_limit, &timeout_micros)) {
32 | return -1;
33 | }
34 |
35 | self->query = Py_NewRef(query);
36 | ts_query_cursor_set_match_limit(self->cursor, match_limit);
37 | ts_query_cursor_set_timeout_micros(self->cursor, timeout_micros);
38 |
39 | return 0;
40 | }
41 |
42 | PyObject *query_cursor_set_max_start_depth(QueryCursor *self, PyObject *args) {
43 | uint32_t max_start_depth;
44 | if (!PyArg_ParseTuple(args, "I:set_max_start_depth", &max_start_depth)) {
45 | return NULL;
46 | }
47 | ts_query_cursor_set_max_start_depth(self->cursor, max_start_depth);
48 | return Py_NewRef(self);
49 | }
50 |
51 | PyObject *query_cursor_set_byte_range(QueryCursor *self, PyObject *args) {
52 | uint32_t start_byte, end_byte;
53 | if (!PyArg_ParseTuple(args, "II:set_byte_range", &start_byte, &end_byte)) {
54 | return NULL;
55 | }
56 | if (!ts_query_cursor_set_byte_range(self->cursor, start_byte, end_byte)) {
57 | PyErr_SetString(PyExc_ValueError, "Invalid byte range");
58 | return NULL;
59 | }
60 | return Py_NewRef(self);
61 | }
62 |
63 | PyObject *query_cursor_set_point_range(QueryCursor *self, PyObject *args) {
64 | TSPoint start_point, end_point;
65 | if (!PyArg_ParseTuple(args, "(II)(II):set_point_range", &start_point.row, &start_point.column,
66 | &end_point.row, &end_point.column)) {
67 | return NULL;
68 | }
69 | if (!ts_query_cursor_set_point_range(self->cursor, start_point, end_point)) {
70 | PyErr_SetString(PyExc_ValueError, "Invalid point range");
71 | return NULL;
72 | }
73 | return Py_NewRef(self);
74 | }
75 |
76 | static bool query_cursor_progress_callback(TSQueryCursorState *state) {
77 | PyObject *result =
78 | PyObject_CallFunction((PyObject *)state->payload, "I", state->current_byte_offset);
79 | return PyObject_IsTrue(result);
80 | }
81 |
82 | PyObject *query_cursor_matches(QueryCursor *self, PyObject *args, PyObject *kwargs) {
83 | ModuleState *state = GET_MODULE_STATE(self);
84 | char *keywords[] = {"node", "predicate", "progress_callback", NULL};
85 | PyObject *node_obj, *predicate = NULL, *progress_callback_obj = NULL;
86 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!|OO:matches", keywords, state->node_type,
87 | &node_obj, &predicate, &progress_callback_obj)) {
88 | return NULL;
89 | }
90 | if (predicate != NULL && !PyCallable_Check(predicate)) {
91 | PyErr_Format(PyExc_TypeError, "predicate must be a callable, not %s",
92 | predicate->ob_type->tp_name);
93 | return NULL;
94 | }
95 | if (progress_callback_obj != NULL && !PyCallable_Check(progress_callback_obj)) {
96 | PyErr_Format(PyExc_TypeError, "progress_callback must be a callable, not %s",
97 | progress_callback_obj->ob_type->tp_name);
98 | return NULL;
99 | }
100 |
101 | PyObject *result = PyList_New(0);
102 | if (result == NULL) {
103 | return NULL;
104 | }
105 |
106 | TSQueryMatch match;
107 | uint32_t name_length;
108 | Node *node = (Node *)node_obj;
109 | Query *query = (Query *)self->query;
110 | if (progress_callback_obj == NULL) {
111 | ts_query_cursor_exec(self->cursor, query->query, node->node);
112 | } else {
113 | TSQueryCursorOptions options = {
114 | .payload = progress_callback_obj,
115 | .progress_callback = query_cursor_progress_callback,
116 | };
117 | ts_query_cursor_exec_with_options(self->cursor, query->query, node->node, &options);
118 | }
119 | while (ts_query_cursor_next_match(self->cursor, &match)) {
120 | if (!query_satisfies_predicates(query, match, (Tree *)node->tree, predicate)) {
121 | continue;
122 | }
123 |
124 | PyObject *captures_for_match = PyDict_New();
125 | for (uint16_t i = 0; i < match.capture_count; ++i) {
126 | TSQueryCapture capture = match.captures[i];
127 | const char *capture_name =
128 | ts_query_capture_name_for_id(query->query, capture.index, &name_length);
129 | PyObject *capture_name_obj = PyUnicode_FromStringAndSize(capture_name, name_length);
130 | PyObject *capture_node = node_new_internal(state, capture.node, node->tree);
131 | PyObject *default_list = PyList_New(0);
132 | PyObject *capture_list =
133 | PyDict_SetDefault(captures_for_match, capture_name_obj, default_list);
134 | Py_DECREF(capture_name_obj);
135 | Py_DECREF(default_list);
136 | PyList_Append(capture_list, capture_node);
137 | Py_XDECREF(capture_node);
138 | }
139 | PyObject *pattern_index = PyLong_FromSize_t(match.pattern_index);
140 | PyObject *tuple_match = PyTuple_Pack(2, pattern_index, captures_for_match);
141 | Py_DECREF(pattern_index);
142 | Py_DECREF(captures_for_match);
143 | PyList_Append(result, tuple_match);
144 | Py_XDECREF(tuple_match);
145 | }
146 |
147 | return PyErr_Occurred() == NULL ? result : NULL;
148 | }
149 |
150 | PyObject *query_cursor_captures(QueryCursor *self, PyObject *args, PyObject *kwargs) {
151 | ModuleState *state = GET_MODULE_STATE(self);
152 | char *keywords[] = {"node", "predicate", "progress_callback", NULL};
153 | PyObject *node_obj, *predicate = NULL, *progress_callback_obj = NULL;
154 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!|OO:captures", keywords, state->node_type,
155 | &node_obj, &predicate, &progress_callback_obj)) {
156 | return NULL;
157 | }
158 | if (predicate != NULL && !PyCallable_Check(predicate)) {
159 | PyErr_Format(PyExc_TypeError, "predicate must be a callable, not %s",
160 | predicate->ob_type->tp_name);
161 | return NULL;
162 | }
163 | if (progress_callback_obj != NULL && !PyCallable_Check(progress_callback_obj)) {
164 | PyErr_Format(PyExc_TypeError, "progress_callback must be a callable, not %s",
165 | progress_callback_obj->ob_type->tp_name);
166 | return NULL;
167 | }
168 |
169 | PyObject *result = PyDict_New();
170 | if (result == NULL) {
171 | return NULL;
172 | }
173 |
174 | uint32_t capture_index;
175 | TSQueryMatch match;
176 | uint32_t name_length;
177 | Node *node = (Node *)node_obj;
178 | Query *query = (Query *)self->query;
179 | if (progress_callback_obj == NULL) {
180 | ts_query_cursor_exec(self->cursor, query->query, node->node);
181 | } else {
182 | TSQueryCursorOptions options = {
183 | .payload = progress_callback_obj,
184 | .progress_callback = query_cursor_progress_callback,
185 | };
186 | ts_query_cursor_exec_with_options(self->cursor, query->query, node->node, &options);
187 | }
188 | while (ts_query_cursor_next_capture(self->cursor, &match, &capture_index)) {
189 | if (!query_satisfies_predicates(query, match, (Tree *)node->tree, predicate)) {
190 | continue;
191 | }
192 | if (PyErr_Occurred()) {
193 | return NULL;
194 | }
195 |
196 | TSQueryCapture capture = match.captures[capture_index];
197 | const char *capture_name =
198 | ts_query_capture_name_for_id(query->query, capture.index, &name_length);
199 | PyObject *capture_name_obj = PyUnicode_FromStringAndSize(capture_name, name_length);
200 | PyObject *capture_node = node_new_internal(state, capture.node, node->tree);
201 | PyObject *default_set = PySet_New(NULL);
202 | PyObject *capture_set = PyDict_SetDefault(result, capture_name_obj, default_set);
203 | Py_DECREF(capture_name_obj);
204 | Py_DECREF(default_set);
205 | PySet_Add(capture_set, capture_node);
206 | Py_XDECREF(capture_node);
207 | }
208 |
209 | Py_ssize_t pos = 0;
210 | PyObject *key, *value;
211 | // convert each set to a list so it can be subscriptable
212 | while (PyDict_Next(result, &pos, &key, &value)) {
213 | PyObject *list = PySequence_List(value);
214 | PyDict_SetItem(result, key, list);
215 | Py_DECREF(list);
216 | }
217 |
218 | return PyErr_Occurred() == NULL ? result : NULL;
219 | }
220 |
221 | PyObject *query_cursor_get_did_exceed_match_limit(QueryCursor *self, void *Py_UNUSED(payload)) {
222 | return PyLong_FromSize_t(ts_query_cursor_did_exceed_match_limit(self->cursor));
223 | }
224 |
225 | PyObject *query_cursor_get_match_limit(QueryCursor *self, void *Py_UNUSED(payload)) {
226 | return PyLong_FromUnsignedLong(ts_query_cursor_match_limit(self->cursor));
227 | }
228 |
229 | int query_cursor_set_match_limit(QueryCursor *self, PyObject *arg, void *Py_UNUSED(payload)) {
230 | if (arg == NULL || arg == Py_None) {
231 | ts_query_cursor_set_match_limit(self->cursor, UINT32_MAX);
232 | return 0;
233 | }
234 | if (!PyLong_Check(arg)) {
235 | PyErr_Format(PyExc_TypeError, "'match_limit' must be assigned an int, not %s",
236 | arg->ob_type->tp_name);
237 | return -1;
238 | }
239 |
240 | ts_query_cursor_set_timeout_micros(self->cursor, PyLong_AsSize_t(arg));
241 | return 0;
242 | }
243 |
244 | PyObject *query_cursor_get_timeout_micros(QueryCursor *self, void *Py_UNUSED(payload)) {
245 | if (DEPRECATE("Use the progress_callback in matches() or captures()") < 0) {
246 | return NULL;
247 | }
248 | return PyLong_FromUnsignedLong(ts_query_cursor_timeout_micros(self->cursor));
249 | }
250 |
251 | int query_cursor_set_timeout_micros(QueryCursor *self, PyObject *arg, void *Py_UNUSED(payload)) {
252 | if (DEPRECATE("Use the progress_callback in matches() or captures()") < 0) {
253 | return -1;
254 | }
255 | if (arg == NULL || arg == Py_None) {
256 | ts_query_cursor_set_timeout_micros(self->cursor, 0);
257 | return 0;
258 | }
259 | if (!PyLong_Check(arg)) {
260 | PyErr_Format(PyExc_TypeError, "'timeout_micros' must be assigned an int, not %s",
261 | arg->ob_type->tp_name);
262 | return -1;
263 | }
264 |
265 | ts_query_cursor_set_timeout_micros(self->cursor, PyLong_AsSize_t(arg));
266 | return 0;
267 | }
268 |
269 | PyDoc_STRVAR(query_cursor_set_max_start_depth_doc,
270 | "set_max_start_depth(self, max_start_depth)\n--\n\n"
271 | "Set the maximum start depth for the query.");
272 | PyDoc_STRVAR(query_cursor_set_byte_range_doc,
273 | "set_byte_range(self, start, end)\n--\n\n"
274 | "Set the range of bytes in which the query will be executed." DOC_RAISES
275 | "ValueError\n\n If the start byte exceeds the end byte." DOC_NOTE
276 | "The query cursor will return matches that intersect with the given byte range. "
277 | "This means that a match may be returned even if some of its captures fall outside "
278 | "the specified range, as long as at least part of the match overlaps with it.");
279 | PyDoc_STRVAR(query_cursor_set_point_range_doc,
280 | "set_point_range(self, start, end)\n--\n\n"
281 | "Set the range of points in which the query will be executed." DOC_RAISES
282 | "ValueError\n\n If the start point exceeds the end point." DOC_NOTE
283 | "The query cursor will return matches that intersect with the given point range. "
284 | "This means that a match may be returned even if some of its captures fall outside "
285 | "the specified range, as long as at least part of the match overlaps with it.");
286 | PyDoc_STRVAR(query_cursor_matches_doc,
287 | "matches(self, node, /, predicate=None, progress_callback=None)\n--\n\n"
288 | "Get a list of *matches* within the given node." DOC_RETURNS
289 | "A list of tuples where the first element is the pattern index and "
290 | "the second element is a dictionary that maps capture names to nodes.");
291 | PyDoc_STRVAR(query_cursor_captures_doc,
292 | "captures(self, node, /, predicate=None, progress_callback=None)\n--\n\n"
293 | "Get a list of *captures* within the given node.\n\n" DOC_RETURNS
294 | "A dict where the keys are the names of the captures and the values are "
295 | "lists of the captured nodes." DOC_HINT "This method returns all of the"
296 | "captures while :meth:`matches` only returns the last match.");
297 |
298 | static PyMethodDef query_cursor_methods[] = {
299 | {
300 | .ml_name = "set_max_start_depth",
301 | .ml_meth = (PyCFunction)query_cursor_set_max_start_depth,
302 | .ml_flags = METH_VARARGS,
303 | .ml_doc = query_cursor_set_max_start_depth_doc,
304 | },
305 | {
306 | .ml_name = "set_byte_range",
307 | .ml_meth = (PyCFunction)query_cursor_set_byte_range,
308 | .ml_flags = METH_VARARGS,
309 | .ml_doc = query_cursor_set_byte_range_doc,
310 | },
311 | {
312 | .ml_name = "set_point_range",
313 | .ml_meth = (PyCFunction)query_cursor_set_point_range,
314 | .ml_flags = METH_VARARGS,
315 | .ml_doc = query_cursor_set_point_range_doc,
316 | },
317 | {
318 | .ml_name = "matches",
319 | .ml_meth = (PyCFunction)query_cursor_matches,
320 | .ml_flags = METH_VARARGS | METH_KEYWORDS,
321 | .ml_doc = query_cursor_matches_doc,
322 | },
323 | {
324 | .ml_name = "captures",
325 | .ml_meth = (PyCFunction)query_cursor_captures,
326 | .ml_flags = METH_VARARGS | METH_KEYWORDS,
327 | .ml_doc = query_cursor_captures_doc,
328 | },
329 | {NULL},
330 | };
331 |
332 | static PyGetSetDef query_cursor_accessors[] = {
333 | {"timeout_micros", (getter)query_cursor_get_timeout_micros,
334 | (setter)query_cursor_set_timeout_micros,
335 | PyDoc_STR("The maximum duration in microseconds that query "
336 | "execution should be allowed to take before halting."),
337 | NULL},
338 | {"match_limit", (getter)query_cursor_get_match_limit, (setter)query_cursor_set_match_limit,
339 | PyDoc_STR("The maximum number of in-progress matches."), NULL},
340 | {"did_exceed_match_limit", (getter)query_cursor_get_did_exceed_match_limit, NULL,
341 | PyDoc_STR("Check if the query exceeded its maximum number of "
342 | "in-progress matches during its last execution."),
343 | NULL},
344 | {NULL},
345 | };
346 |
347 | static PyType_Slot query_cursor_type_slots[] = {
348 | {Py_tp_doc,
349 | PyDoc_STR("A class for executing a :class:`Query` on a syntax :class:`Tree`.")},
350 | {Py_tp_new, query_cursor_new},
351 | {Py_tp_init, query_cursor_init},
352 | {Py_tp_dealloc, query_cursor_dealloc},
353 | {Py_tp_methods, query_cursor_methods},
354 | {Py_tp_getset, query_cursor_accessors},
355 | {0, NULL},
356 | };
357 |
358 | PyType_Spec query_cursor_type_spec = {
359 | .name = "tree_sitter.QueryCursor",
360 | .basicsize = sizeof(QueryCursor),
361 | .itemsize = 0,
362 | .flags = Py_TPFLAGS_DEFAULT,
363 | .slots = query_cursor_type_slots,
364 | };
365 |
--------------------------------------------------------------------------------
/tree_sitter/binding/query_predicates.c:
--------------------------------------------------------------------------------
1 | #include "types.h"
2 |
3 | PyObject *node_new_internal(ModuleState *state, TSNode node, PyObject *tree);
4 |
5 | PyObject *node_get_text(Node *self, void *payload);
6 |
7 | #define PREDICATE_CMP(val1, val2, predicate) \
8 | PyObject_RichCompareBool((val1), (val2), (predicate)->is_positive ? Py_EQ : Py_NE)
9 |
10 | // clang-format off
11 | #define PREDICATE_BREAK(predicate, result) \
12 | if (((result) != 1 && !(predicate)->is_any) || ((result) == 1 && (predicate)->is_any)) break
13 | // clang-format on
14 |
15 | static inline PyObject *nodes_for_capture_index(ModuleState *state, uint32_t index,
16 | TSQueryMatch *match, Tree *tree) {
17 | PyObject *result = PyList_New(0);
18 | for (uint16_t i = 0; i < match->capture_count; ++i) {
19 | TSQueryCapture capture = match->captures[i];
20 | if (capture.index == index) {
21 | PyObject *node = node_new_internal(state, capture.node, (PyObject *)tree);
22 | PyList_Append(result, node);
23 | Py_XDECREF(node);
24 | }
25 | }
26 | return result;
27 | }
28 |
29 | static inline PyObject *captures_for_match(ModuleState *state, TSQuery *query, TSQueryMatch *match,
30 | Tree *tree) {
31 | uint32_t name_length;
32 | PyObject *captures = PyDict_New();
33 | for (uint32_t j = 0; j < match->capture_count; ++j) {
34 | TSQueryCapture capture = match->captures[j];
35 | const char *capture_name =
36 | ts_query_capture_name_for_id(query, capture.index, &name_length);
37 | PyObject *capture_name_obj = PyUnicode_FromStringAndSize(capture_name, name_length);
38 | if (capture_name_obj == NULL) {
39 | return NULL;
40 | }
41 | PyObject *nodes = nodes_for_capture_index(state, capture.index, match, tree);
42 | if (PyDict_SetItem(captures, capture_name_obj, nodes) == -1) {
43 | return NULL;
44 | }
45 | Py_DECREF(capture_name_obj);
46 | }
47 | return captures;
48 | }
49 |
50 | static inline bool satisfies_anyof(ModuleState *state, QueryPredicateAnyOf *predicate,
51 | TSQueryMatch *match, Tree *tree) {
52 | PyObject *nodes = nodes_for_capture_index(state, predicate->capture_id, match, tree);
53 | for (size_t i = 0, l = (size_t)PyList_Size(nodes); i < l; ++i) {
54 | Node *node = (Node *)PyList_GetItem(nodes, i);
55 | PyObject *text1 = node_get_text(node, NULL);
56 | bool found_match = false;
57 |
58 | for (size_t j = 0, k = (size_t)PyList_Size(predicate->values); j < k; ++j) {
59 | PyObject *text2 = PyList_GetItem(predicate->values, j);
60 | if (PREDICATE_CMP(text1, text2, predicate) == 1) {
61 | found_match = true;
62 | break;
63 | }
64 | }
65 |
66 | Py_DECREF(text1);
67 |
68 | if (!found_match) {
69 | Py_DECREF(nodes);
70 | return false;
71 | }
72 | }
73 |
74 | Py_DECREF(nodes);
75 | return true;
76 | }
77 |
78 | static inline bool satisfies_eq_capture(ModuleState *state, QueryPredicateEqCapture *predicate,
79 | TSQueryMatch *match, Tree *tree) {
80 | PyObject *nodes1 = nodes_for_capture_index(state, predicate->capture1_id, match, tree),
81 | *nodes2 = nodes_for_capture_index(state, predicate->capture2_id, match, tree);
82 | PyObject *text1, *text2;
83 | size_t size1 = (size_t)PyList_Size(nodes1), size2 = (size_t)PyList_Size(nodes2);
84 | int result = 1;
85 | for (size_t i = 0, l = size1 < size2 ? size1 : size2; i < l; ++i) {
86 | text1 = node_get_text((Node *)PyList_GetItem(nodes1, i), NULL);
87 | text2 = node_get_text((Node *)PyList_GetItem(nodes2, i), NULL);
88 | result = PREDICATE_CMP(text1, text2, predicate);
89 | Py_DECREF(text1);
90 | Py_DECREF(text2);
91 | PREDICATE_BREAK(predicate, result);
92 | }
93 | Py_DECREF(nodes1);
94 | Py_DECREF(nodes2);
95 | return result == 1;
96 | }
97 |
98 | static inline bool satisfies_eq_string(ModuleState *state, QueryPredicateEqString *predicate,
99 | TSQueryMatch *match, Tree *tree) {
100 | PyObject *nodes = nodes_for_capture_index(state, predicate->capture_id, match, tree);
101 | PyObject *text1, *text2 = predicate->string_value;
102 | int result = 1;
103 | for (size_t i = 0, l = (size_t)PyList_Size(nodes); i < l; ++i) {
104 | text1 = node_get_text((Node *)PyList_GetItem(nodes, i), NULL);
105 | result = PREDICATE_CMP(text1, text2, predicate);
106 | Py_DECREF(text1);
107 | PREDICATE_BREAK(predicate, result);
108 | }
109 | Py_DECREF(nodes);
110 | return result == 1;
111 | }
112 |
113 | static inline bool satisfies_match(ModuleState *state, QueryPredicateMatch *predicate,
114 | TSQueryMatch *match, Tree *tree) {
115 | PyObject *nodes = nodes_for_capture_index(state, predicate->capture_id, match, tree);
116 | PyObject *text, *search_result;
117 | int result = 1;
118 | for (size_t i = 0, l = (size_t)PyList_Size(nodes); i < l; ++i) {
119 | text = node_get_text((Node *)PyList_GetItem(nodes, i), NULL);
120 | search_result =
121 | PyObject_CallMethod(predicate->pattern, "search", "s", PyBytes_AsString(text));
122 | result = (search_result != NULL && search_result != Py_None) == predicate->is_positive;
123 | Py_DECREF(text);
124 | Py_XDECREF(search_result);
125 | PREDICATE_BREAK(predicate, result);
126 | }
127 | Py_DECREF(nodes);
128 | return result == 1;
129 | }
130 |
131 | bool query_satisfies_predicates(Query *query, TSQueryMatch match, Tree *tree, PyObject *callable) {
132 | // if there is no source, ignore the predicates
133 | if (tree->source == NULL || tree->source == Py_None) {
134 | return true;
135 | }
136 |
137 | ModuleState *state = GET_MODULE_STATE(query);
138 | PyObject *pattern_predicates = PyList_GetItem(query->predicates, match.pattern_index);
139 | if (pattern_predicates == NULL) {
140 | return false;
141 | }
142 |
143 | // check if all predicates are satisfied
144 | bool is_satisfied = true;
145 | for (size_t i = 0, l = (size_t)PyList_Size(pattern_predicates); is_satisfied && i < l; ++i) {
146 | PyObject *item = PyList_GetItem(pattern_predicates, i);
147 | if (IS_INSTANCE_OF(item, state->query_predicate_anyof_type)) {
148 | is_satisfied = satisfies_anyof(state, (QueryPredicateAnyOf *)item, &match, tree);
149 | } else if (IS_INSTANCE_OF(item, state->query_predicate_eq_capture_type)) {
150 | is_satisfied =
151 | satisfies_eq_capture(state, (QueryPredicateEqCapture *)item, &match, tree);
152 | } else if (IS_INSTANCE_OF(item, state->query_predicate_eq_string_type)) {
153 | is_satisfied = satisfies_eq_string(state, (QueryPredicateEqString *)item, &match, tree);
154 | } else if (IS_INSTANCE_OF(item, state->query_predicate_match_type)) {
155 | is_satisfied = satisfies_match(state, (QueryPredicateMatch *)item, &match, tree);
156 | } else if (callable != NULL) {
157 | PyObject *captures = captures_for_match(state, query->query, &match, tree);
158 | if (captures == NULL) {
159 | is_satisfied = false;
160 | break;
161 | }
162 | QueryPredicateGeneric *predicate = (QueryPredicateGeneric *)item;
163 | PyObject *result = PyObject_CallFunction(callable, "OOIO", predicate->predicate,
164 | predicate->arguments, i, captures);
165 | if (result == NULL) {
166 | is_satisfied = false;
167 | break;
168 | }
169 | is_satisfied = PyObject_IsTrue(result);
170 | Py_DECREF(result);
171 | }
172 | }
173 |
174 | return is_satisfied;
175 | }
176 |
177 | // QueryPredicateAnyOf {{{
178 |
179 | static void query_predicate_anyof_dealloc(QueryPredicateAnyOf *self) {
180 | Py_XDECREF(self->values);
181 | Py_TYPE(self)->tp_free(self);
182 | }
183 |
184 | static PyType_Slot query_predicate_anyof_slots[] = {
185 | {Py_tp_doc, ""},
186 | {Py_tp_dealloc, query_predicate_anyof_dealloc},
187 | {0, NULL},
188 | };
189 |
190 | PyType_Spec query_predicate_anyof_type_spec = {
191 | .name = "tree_sitter.QueryPredicateAnyOf",
192 | .basicsize = sizeof(QueryPredicateAnyOf),
193 | .itemsize = 0,
194 | .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
195 | .slots = query_predicate_anyof_slots,
196 | };
197 |
198 | // }}}
199 |
200 | // QueryPredicateEqCapture {{{
201 |
202 | static void query_predicate_eq_capture_dealloc(QueryPredicateEqCapture *self) {
203 | Py_TYPE(self)->tp_free(self);
204 | }
205 |
206 | static PyType_Slot query_predicate_eq_capture_slots[] = {
207 | {Py_tp_doc, ""},
208 | {Py_tp_dealloc, query_predicate_eq_capture_dealloc},
209 | {0, NULL},
210 | };
211 |
212 | PyType_Spec query_predicate_eq_capture_type_spec = {
213 | .name = "tree_sitter.QueryPredicateEqCapture",
214 | .basicsize = sizeof(QueryPredicateEqCapture),
215 | .itemsize = 0,
216 | .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
217 | .slots = query_predicate_eq_capture_slots,
218 | };
219 |
220 | // }}}
221 |
222 | // QueryPredicateEqString {{{
223 |
224 | static void query_predicate_eq_string_dealloc(QueryPredicateEqString *self) {
225 | Py_XDECREF(self->string_value);
226 | Py_TYPE(self)->tp_free(self);
227 | }
228 |
229 | static PyType_Slot query_predicate_eq_string_slots[] = {
230 | {Py_tp_doc, ""},
231 | {Py_tp_dealloc, query_predicate_eq_string_dealloc},
232 | {0, NULL},
233 | };
234 |
235 | PyType_Spec query_predicate_eq_string_type_spec = {
236 | .name = "tree_sitter.QueryPredicateEqString",
237 | .basicsize = sizeof(QueryPredicateEqString),
238 | .itemsize = 0,
239 | .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
240 | .slots = query_predicate_eq_string_slots,
241 | };
242 |
243 | // }}}
244 |
245 | // QueryPredicateMatch {{{
246 |
247 | static void query_predicate_match_dealloc(QueryPredicateMatch *self) {
248 | Py_XDECREF(self->pattern);
249 | Py_TYPE(self)->tp_free(self);
250 | }
251 |
252 | static PyType_Slot query_predicate_match_slots[] = {
253 | {Py_tp_doc, ""},
254 | {Py_tp_dealloc, query_predicate_match_dealloc},
255 | {0, NULL},
256 | };
257 |
258 | PyType_Spec query_predicate_match_type_spec = {
259 | .name = "tree_sitter.QueryPredicateMatch",
260 | .basicsize = sizeof(QueryPredicateMatch),
261 | .itemsize = 0,
262 | .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
263 | .slots = query_predicate_match_slots,
264 | };
265 |
266 | // }}}
267 |
268 | // QueryPredicateGeneric {{{
269 |
270 | static void query_predicate_generic_dealloc(QueryPredicateGeneric *self) {
271 | Py_XDECREF(self->predicate);
272 | Py_XDECREF(self->arguments);
273 | Py_TYPE(self)->tp_free(self);
274 | }
275 |
276 | static PyType_Slot query_predicate_generic_slots[] = {
277 | {Py_tp_doc, ""},
278 | {Py_tp_dealloc, query_predicate_generic_dealloc},
279 | {0, NULL},
280 | };
281 |
282 | PyType_Spec query_predicate_generic_type_spec = {
283 | .name = "tree_sitter.QueryPredicateGeneric",
284 | .basicsize = sizeof(QueryPredicateGeneric),
285 | .itemsize = 0,
286 | .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
287 | .slots = query_predicate_generic_slots,
288 | };
289 |
290 | // }}}
291 |
--------------------------------------------------------------------------------
/tree_sitter/binding/range.c:
--------------------------------------------------------------------------------
1 | #include "types.h"
2 |
3 | int range_init(Range *self, PyObject *args, PyObject *kwargs) {
4 | uint32_t start_row, start_col, end_row, end_col, start_byte, end_byte;
5 | char *keywords[] = {
6 | "start_point", "end_point", "start_byte", "end_byte", NULL,
7 | };
8 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "(II)(II)II:__init__", keywords, &start_row,
9 | &start_col, &end_row, &end_col, &start_byte, &end_byte)) {
10 | return -1;
11 | }
12 |
13 | if (start_row > end_row || (start_row == end_row && start_col > end_col)) {
14 | PyErr_Format(PyExc_ValueError, "Invalid point range: (%u, %u) to (%u, %u)", start_row,
15 | start_col, end_row, end_col);
16 | return -1;
17 | }
18 |
19 | if (start_byte > end_byte) {
20 | PyErr_Format(PyExc_ValueError, "Invalid byte range: %u to %u", start_byte, end_byte);
21 | return -1;
22 | }
23 |
24 | self->range.start_point.row = start_row;
25 | self->range.start_point.column = start_col;
26 | self->range.end_point.row = end_row;
27 | self->range.end_point.column = end_col;
28 | self->range.start_byte = start_byte;
29 | self->range.end_byte = end_byte;
30 |
31 | return 0;
32 | }
33 |
34 | void range_dealloc(Range *self) { Py_TYPE(self)->tp_free(self); }
35 |
36 | PyObject *range_repr(Range *self) {
37 | const char *format_string =
38 | "";
39 | return PyUnicode_FromFormat(format_string, self->range.start_point.row,
40 | self->range.start_point.column, self->range.end_point.row,
41 | self->range.end_point.column, self->range.start_byte,
42 | self->range.end_byte);
43 | }
44 |
45 | Py_hash_t range_hash(Range *self) {
46 | // FIXME: replace with an efficient integer hashing algorithm
47 | PyObject *row_tuple = PyTuple_Pack(2, PyLong_FromSize_t(self->range.start_point.row),
48 | PyLong_FromLong(self->range.end_point.row));
49 | if (!row_tuple) {
50 | return -1;
51 | }
52 |
53 | PyObject *col_tuple = PyTuple_Pack(2, PyLong_FromSize_t(self->range.start_point.column),
54 | PyLong_FromSize_t(self->range.end_point.column));
55 | if (!col_tuple) {
56 | Py_DECREF(row_tuple);
57 | return -1;
58 | }
59 |
60 | PyObject *bytes_tuple = PyTuple_Pack(2, PyLong_FromSize_t(self->range.start_byte),
61 | PyLong_FromSize_t(self->range.end_byte));
62 | if (!bytes_tuple) {
63 | Py_DECREF(row_tuple);
64 | Py_DECREF(col_tuple);
65 | return -1;
66 | }
67 |
68 | PyObject *range_tuple = PyTuple_Pack(3, row_tuple, col_tuple, bytes_tuple);
69 | if (!range_tuple) {
70 | Py_DECREF(row_tuple);
71 | Py_DECREF(col_tuple);
72 | Py_DECREF(bytes_tuple);
73 | return -1;
74 | }
75 |
76 | Py_hash_t hash = PyObject_Hash(range_tuple);
77 |
78 | Py_DECREF(range_tuple);
79 | Py_DECREF(row_tuple);
80 | Py_DECREF(col_tuple);
81 | Py_DECREF(bytes_tuple);
82 | return hash;
83 | }
84 |
85 | PyObject *range_compare(Range *self, PyObject *other, int op) {
86 | if ((op != Py_EQ && op != Py_NE) || !IS_INSTANCE(other, range_type)) {
87 | Py_RETURN_NOTIMPLEMENTED;
88 | }
89 |
90 | Range *range = (Range *)other;
91 | bool result = ((self->range.start_point.row == range->range.start_point.row) &&
92 | (self->range.start_point.column == range->range.start_point.column) &&
93 | (self->range.start_byte == range->range.start_byte) &&
94 | (self->range.end_point.row == range->range.end_point.row) &&
95 | (self->range.end_point.column == range->range.end_point.column) &&
96 | (self->range.end_byte == range->range.end_byte));
97 | return PyBool_FromLong(result ^ (op == Py_NE));
98 | }
99 |
100 | PyObject *range_get_start_point(Range *self, void *Py_UNUSED(payload)) {
101 | return POINT_NEW(GET_MODULE_STATE(self), self->range.start_point);
102 | }
103 |
104 | PyObject *range_get_end_point(Range *self, void *Py_UNUSED(payload)) {
105 | return POINT_NEW(GET_MODULE_STATE(self), self->range.end_point);
106 | }
107 |
108 | PyObject *range_get_start_byte(Range *self, void *Py_UNUSED(payload)) {
109 | return PyLong_FromUnsignedLong(self->range.start_byte);
110 | }
111 |
112 | PyObject *range_get_end_byte(Range *self, void *Py_UNUSED(payload)) {
113 | return PyLong_FromUnsignedLong(self->range.end_byte);
114 | }
115 |
116 | static PyGetSetDef range_accessors[] = {
117 | {"start_point", (getter)range_get_start_point, NULL, PyDoc_STR("The start point."), NULL},
118 | {"start_byte", (getter)range_get_start_byte, NULL, PyDoc_STR("The start byte."), NULL},
119 | {"end_point", (getter)range_get_end_point, NULL, PyDoc_STR("The end point."), NULL},
120 | {"end_byte", (getter)range_get_end_byte, NULL, PyDoc_STR("The end byte."), NULL},
121 | {NULL},
122 | };
123 |
124 | static PyType_Slot range_type_slots[] = {
125 | {Py_tp_doc, PyDoc_STR("A range of positions in a multi-line text document, "
126 | "both in terms of bytes and of rows and columns.")},
127 | {Py_tp_init, range_init},
128 | {Py_tp_dealloc, range_dealloc},
129 | {Py_tp_repr, range_repr},
130 | {Py_tp_hash, range_hash},
131 | {Py_tp_richcompare, range_compare},
132 | {Py_tp_getset, range_accessors},
133 | {0, NULL},
134 | };
135 |
136 | PyType_Spec range_type_spec = {
137 | .name = "tree_sitter.Range",
138 | .basicsize = sizeof(Range),
139 | .itemsize = 0,
140 | .flags = Py_TPFLAGS_DEFAULT,
141 | .slots = range_type_slots,
142 | };
143 |
--------------------------------------------------------------------------------
/tree_sitter/binding/tree.c:
--------------------------------------------------------------------------------
1 | #include "types.h"
2 |
3 | PyObject *node_new_internal(ModuleState *state, TSNode node, PyObject *tree);
4 |
5 | void tree_dealloc(Tree *self) {
6 | ts_tree_delete(self->tree);
7 | Py_XDECREF(self->source);
8 | Py_TYPE(self)->tp_free(self);
9 | }
10 |
11 | PyObject *tree_get_root_node(Tree *self, void *Py_UNUSED(payload)) {
12 | ModuleState *state = GET_MODULE_STATE(self);
13 | TSNode node = ts_tree_root_node(self->tree);
14 | return node_new_internal(state, node, (PyObject *)self);
15 | }
16 |
17 | PyObject *tree_root_node_with_offset(Tree *self, PyObject *args) {
18 | uint32_t offset_bytes;
19 | TSPoint offset_extent;
20 | if (!PyArg_ParseTuple(args, "I(II):root_node_with_offset", &offset_bytes, &offset_extent.row,
21 | &offset_extent.column)) {
22 | return NULL;
23 | }
24 |
25 | ModuleState *state = GET_MODULE_STATE(self);
26 | TSNode node = ts_tree_root_node_with_offset(self->tree, offset_bytes, offset_extent);
27 | if (ts_node_is_null(node)) {
28 | Py_RETURN_NONE;
29 | }
30 | return node_new_internal(state, node, (PyObject *)self);
31 | }
32 |
33 | PyObject *tree_walk(Tree *self, PyObject *Py_UNUSED(args)) {
34 | ModuleState *state = GET_MODULE_STATE(self);
35 | TreeCursor *tree_cursor = PyObject_New(TreeCursor, state->tree_cursor_type);
36 | if (tree_cursor == NULL) {
37 | return NULL;
38 | }
39 |
40 | tree_cursor->tree = Py_NewRef(self);
41 | tree_cursor->node = NULL;
42 | tree_cursor->cursor = ts_tree_cursor_new(ts_tree_root_node(self->tree));
43 | return PyObject_Init((PyObject *)tree_cursor, state->tree_cursor_type);
44 | }
45 |
46 | PyObject *tree_edit(Tree *self, PyObject *args, PyObject *kwargs) {
47 | unsigned start_byte, start_row, start_column;
48 | unsigned old_end_byte, old_end_row, old_end_column;
49 | unsigned new_end_byte, new_end_row, new_end_column;
50 |
51 | char *keywords[] = {
52 | "start_byte", "old_end_byte", "new_end_byte", "start_point",
53 | "old_end_point", "new_end_point", NULL,
54 | };
55 |
56 | int ok = PyArg_ParseTupleAndKeywords(
57 | args, kwargs, "III(II)(II)(II):edit", keywords, &start_byte, &old_end_byte, &new_end_byte,
58 | &start_row, &start_column, &old_end_row, &old_end_column, &new_end_row, &new_end_column);
59 |
60 | if (ok) {
61 | TSInputEdit edit = {
62 | .start_byte = start_byte,
63 | .old_end_byte = old_end_byte,
64 | .new_end_byte = new_end_byte,
65 | .start_point = {start_row, start_column},
66 | .old_end_point = {old_end_row, old_end_column},
67 | .new_end_point = {new_end_row, new_end_column},
68 | };
69 | ts_tree_edit(self->tree, &edit);
70 | Py_XDECREF(self->source);
71 | self->source = Py_None;
72 | Py_INCREF(self->source);
73 | }
74 | Py_RETURN_NONE;
75 | }
76 |
77 | PyObject *tree_copy(Tree *self, PyObject *Py_UNUSED(args)) {
78 | ModuleState *state = GET_MODULE_STATE(self);
79 | Tree *copied = PyObject_New(Tree, state->tree_type);
80 | if (copied == NULL) {
81 | return NULL;
82 | }
83 |
84 | copied->tree = ts_tree_copy(self->tree);
85 | return PyObject_Init((PyObject *)copied, state->tree_type);
86 | }
87 |
88 | PyObject *tree_print_dot_graph(Tree *self, PyObject *arg) {
89 | int fd = PyObject_AsFileDescriptor(arg);
90 | if (fd < 0) {
91 | return NULL;
92 | }
93 | Py_BEGIN_ALLOW_THREADS
94 | ts_tree_print_dot_graph(self->tree, fd);
95 | Py_END_ALLOW_THREADS
96 | Py_RETURN_NONE;
97 | }
98 |
99 | PyObject *tree_changed_ranges(Tree *self, PyObject *args, PyObject *kwargs) {
100 | ModuleState *state = GET_MODULE_STATE(self);
101 | PyObject *new_tree;
102 | char *keywords[] = {"new_tree", NULL};
103 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:changed_ranges", keywords, state->tree_type,
104 | &new_tree)) {
105 | return NULL;
106 | }
107 |
108 | uint32_t length = 0;
109 | TSTree *tree = ((Tree *)new_tree)->tree;
110 | TSRange *ranges = ts_tree_get_changed_ranges(self->tree, tree, &length);
111 |
112 | PyObject *result = PyList_New(length);
113 | if (result == NULL) {
114 | return NULL;
115 | }
116 | for (unsigned i = 0; i < length; ++i) {
117 | Range *range = PyObject_New(Range, state->range_type);
118 | if (range == NULL) {
119 | return NULL;
120 | }
121 | range->range = ranges[i];
122 | PyList_SetItem(result, i, PyObject_Init((PyObject *)range, state->range_type));
123 | }
124 |
125 | PyMem_Free(ranges);
126 | return result;
127 | }
128 |
129 | PyObject *tree_get_included_ranges(Tree *self, PyObject *Py_UNUSED(args)) {
130 | ModuleState *state = GET_MODULE_STATE(self);
131 | uint32_t length = 0;
132 | TSRange *ranges = ts_tree_included_ranges(self->tree, &length);
133 |
134 | PyObject *result = PyList_New(length);
135 | if (result == NULL) {
136 | return NULL;
137 | }
138 | for (unsigned i = 0; i < length; ++i) {
139 | Range *range = PyObject_New(Range, state->range_type);
140 | if (range == NULL) {
141 | return NULL;
142 | }
143 | range->range = ranges[i];
144 | PyList_SetItem(result, i, PyObject_Init((PyObject *)range, state->range_type));
145 | }
146 |
147 | PyMem_Free(ranges);
148 | return result;
149 | }
150 |
151 | PyObject *tree_get_language(Tree *self, PyObject *Py_UNUSED(args)) {
152 | return Py_NewRef(self->language);
153 | }
154 |
155 | PyDoc_STRVAR(tree_root_node_with_offset_doc,
156 | "root_node_with_offset(self, offset_bytes, offset_extent, /)\n--\n\n"
157 | "Get the root node of the syntax tree, but with its position shifted "
158 | "forward by the given offset.");
159 | PyDoc_STRVAR(tree_walk_doc, "walk(self, /)\n--\n\n"
160 | "Create a new :class:`TreeCursor` starting from the root of the tree.");
161 | PyDoc_STRVAR(tree_edit_doc,
162 | "edit(self, start_byte, old_end_byte, new_end_byte, start_point, old_end_point, "
163 | "new_end_point)\n--\n\n"
164 | "Edit the syntax tree to keep it in sync with source code that has been edited.\n\n"
165 | "You must describe the edit both in terms of byte offsets and of row/column points.");
166 | PyDoc_STRVAR(
167 | tree_changed_ranges_doc,
168 | "changed_ranges(self, /, new_tree)\n--\n\n"
169 | "Compare this old edited syntax tree to a new syntax tree representing the same document, "
170 | "returning a sequence of ranges whose syntactic structure has changed." DOC_RETURNS
171 | "Ranges where the hierarchical structure of syntax nodes (from root to leaf) has changed "
172 | "between the old and new trees. Characters outside these ranges have identical ancestor "
173 | "nodes in both trees." DOC_NOTE "The returned ranges may be slightly larger than the exact "
174 | "changed areas, but Tree-sitter attempts to make them as small as possible. " DOC_TIP
175 | "For this to work correctly, this syntax tree must have been edited such that its "
176 | "ranges match up to the new tree.\n\nGenerally, you'll want to call this method "
177 | "right after calling the :meth:`Parser.parse` method. Call it on the old tree that "
178 | "was passed to the method, and pass the new tree that was returned from it.");
179 | PyDoc_STRVAR(tree_print_dot_graph_doc,
180 | "print_dot_graph(self, /, file)\n--\n\n"
181 | "Write a DOT graph describing the syntax tree to the given file.");
182 | PyDoc_STRVAR(tree_copy_doc, "copy(self, /)\n--\n\n"
183 | "Create a shallow copy of the tree.");
184 | PyDoc_STRVAR(tree_copy2_doc, "__copy__(self, /)\n--\n\n"
185 | "Use :func:`copy.copy` to create a copy of the tree.");
186 |
187 | static PyMethodDef tree_methods[] = {
188 | {
189 | .ml_name = "root_node_with_offset",
190 | .ml_meth = (PyCFunction)tree_root_node_with_offset,
191 | .ml_flags = METH_VARARGS,
192 | .ml_doc = tree_root_node_with_offset_doc,
193 | },
194 | {
195 | .ml_name = "walk",
196 | .ml_meth = (PyCFunction)tree_walk,
197 | .ml_flags = METH_NOARGS,
198 | .ml_doc = tree_walk_doc,
199 | },
200 | {
201 | .ml_name = "edit",
202 | .ml_meth = (PyCFunction)tree_edit,
203 | .ml_flags = METH_KEYWORDS | METH_VARARGS,
204 | .ml_doc = tree_edit_doc,
205 | },
206 | {
207 | .ml_name = "changed_ranges",
208 | .ml_meth = (PyCFunction)tree_changed_ranges,
209 | .ml_flags = METH_KEYWORDS | METH_VARARGS,
210 | .ml_doc = tree_changed_ranges_doc,
211 | },
212 | {
213 | .ml_name = "print_dot_graph",
214 | .ml_meth = (PyCFunction)tree_print_dot_graph,
215 | .ml_flags = METH_O,
216 | .ml_doc = tree_print_dot_graph_doc,
217 | },
218 | {
219 | .ml_name = "copy",
220 | .ml_meth = (PyCFunction)tree_copy,
221 | .ml_flags = METH_NOARGS,
222 | .ml_doc = tree_copy_doc,
223 | },
224 | {.ml_name = "__copy__",
225 | .ml_meth = (PyCFunction)tree_copy,
226 | .ml_flags = METH_NOARGS,
227 | .ml_doc = tree_copy2_doc},
228 | {NULL},
229 | };
230 |
231 | static PyGetSetDef tree_accessors[] = {
232 | {"root_node", (getter)tree_get_root_node, NULL, PyDoc_STR("The root node of the syntax tree."),
233 | NULL},
234 | {"included_ranges", (getter)tree_get_included_ranges, NULL,
235 | PyDoc_STR("The included ranges that were used to parse the syntax tree."), NULL},
236 | {"language", (getter)tree_get_language, NULL,
237 | PyDoc_STR("The language that was used to parse the syntax tree."), NULL},
238 | {NULL},
239 | };
240 |
241 | static PyType_Slot tree_type_slots[] = {
242 | {Py_tp_doc, PyDoc_STR("A tree that represents the syntactic structure of a source code file.")},
243 | {Py_tp_new, NULL},
244 | {Py_tp_dealloc, tree_dealloc},
245 | {Py_tp_methods, tree_methods},
246 | {Py_tp_getset, tree_accessors},
247 | {0, NULL},
248 | };
249 |
250 | PyType_Spec tree_type_spec = {
251 | .name = "tree_sitter.Tree",
252 | .basicsize = sizeof(Tree),
253 | .itemsize = 0,
254 | .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
255 | .slots = tree_type_slots,
256 | };
257 |
--------------------------------------------------------------------------------
/tree_sitter/binding/tree_cursor.c:
--------------------------------------------------------------------------------
1 | #include "types.h"
2 |
3 | PyObject *node_new_internal(ModuleState *state, TSNode node, PyObject *tree);
4 |
5 | void tree_cursor_dealloc(TreeCursor *self) {
6 | ts_tree_cursor_delete(&self->cursor);
7 | Py_XDECREF(self->node);
8 | Py_XDECREF(self->tree);
9 | Py_TYPE(self)->tp_free(self);
10 | }
11 |
12 | PyObject *tree_cursor_get_node(TreeCursor *self, void *Py_UNUSED(payload)) {
13 | if (self->node == NULL) {
14 | TSNode current_node = ts_tree_cursor_current_node(&self->cursor);
15 | if (ts_node_is_null(current_node)) {
16 | Py_RETURN_NONE;
17 | }
18 | ModuleState *state = GET_MODULE_STATE(self);
19 | self->node = node_new_internal(state, current_node, self->tree);
20 | }
21 | return Py_NewRef(self->node);
22 | }
23 |
24 | PyObject *tree_cursor_get_field_id(TreeCursor *self, void *Py_UNUSED(payload)) {
25 | TSFieldId field_id = ts_tree_cursor_current_field_id(&self->cursor);
26 | if (field_id == 0) {
27 | Py_RETURN_NONE;
28 | }
29 | return PyLong_FromUnsignedLong(field_id);
30 | }
31 |
32 | PyObject *tree_cursor_get_field_name(TreeCursor *self, void *Py_UNUSED(payload)) {
33 | const char *field_name = ts_tree_cursor_current_field_name(&self->cursor);
34 | if (field_name == NULL) {
35 | Py_RETURN_NONE;
36 | }
37 | return PyUnicode_FromString(field_name);
38 | }
39 |
40 | PyObject *tree_cursor_get_depth(TreeCursor *self, void *Py_UNUSED(args)) {
41 | uint32_t depth = ts_tree_cursor_current_depth(&self->cursor);
42 | return PyLong_FromUnsignedLong(depth);
43 | }
44 |
45 | PyObject *tree_cursor_get_descendant_index(TreeCursor *self, void *Py_UNUSED(payload)) {
46 | uint32_t index = ts_tree_cursor_current_descendant_index(&self->cursor);
47 | return PyLong_FromUnsignedLong(index);
48 | }
49 |
50 | PyObject *tree_cursor_goto_first_child(TreeCursor *self, PyObject *Py_UNUSED(args)) {
51 | bool result = ts_tree_cursor_goto_first_child(&self->cursor);
52 | if (result) {
53 | Py_XDECREF(self->node);
54 | self->node = NULL;
55 | }
56 | return PyBool_FromLong(result);
57 | }
58 |
59 | PyObject *tree_cursor_goto_last_child(TreeCursor *self, PyObject *Py_UNUSED(args)) {
60 | bool result = ts_tree_cursor_goto_last_child(&self->cursor);
61 | if (result) {
62 | Py_XDECREF(self->node);
63 | self->node = NULL;
64 | }
65 | return PyBool_FromLong(result);
66 | }
67 |
68 | PyObject *tree_cursor_goto_parent(TreeCursor *self, PyObject *Py_UNUSED(args)) {
69 | bool result = ts_tree_cursor_goto_parent(&self->cursor);
70 | if (result) {
71 | Py_XDECREF(self->node);
72 | self->node = NULL;
73 | }
74 | return PyBool_FromLong(result);
75 | }
76 |
77 | PyObject *tree_cursor_goto_next_sibling(TreeCursor *self, PyObject *Py_UNUSED(args)) {
78 | bool result = ts_tree_cursor_goto_next_sibling(&self->cursor);
79 | if (result) {
80 | Py_XDECREF(self->node);
81 | self->node = NULL;
82 | }
83 | return PyBool_FromLong(result);
84 | }
85 |
86 | PyObject *tree_cursor_goto_previous_sibling(TreeCursor *self, PyObject *Py_UNUSED(args)) {
87 | bool result = ts_tree_cursor_goto_previous_sibling(&self->cursor);
88 | if (result) {
89 | Py_XDECREF(self->node);
90 | self->node = NULL;
91 | }
92 | return PyBool_FromLong(result);
93 | }
94 |
95 | PyObject *tree_cursor_goto_descendant(TreeCursor *self, PyObject *args) {
96 | uint32_t index;
97 | if (!PyArg_ParseTuple(args, "I:goto_descendant", &index)) {
98 | return NULL;
99 | }
100 | ts_tree_cursor_goto_descendant(&self->cursor, index);
101 | Py_XDECREF(self->node);
102 | self->node = NULL;
103 | Py_RETURN_NONE;
104 | }
105 |
106 | PyObject *tree_cursor_goto_first_child_for_byte(TreeCursor *self, PyObject *args) {
107 | uint32_t byte;
108 | if (!PyArg_ParseTuple(args, "I:goto_first_child_for_byte", &byte)) {
109 | return NULL;
110 | }
111 |
112 | int64_t result = ts_tree_cursor_goto_first_child_for_byte(&self->cursor, byte);
113 | if (result == -1) {
114 | Py_RETURN_NONE;
115 | }
116 | Py_XDECREF(self->node);
117 | self->node = NULL;
118 | return PyLong_FromUnsignedLong((uint32_t)result);
119 | }
120 |
121 | PyObject *tree_cursor_goto_first_child_for_point(TreeCursor *self, PyObject *args) {
122 | TSPoint point;
123 | if (!PyArg_ParseTuple(args, "(II):goto_first_child_for_point", &point.row, &point.column)) {
124 | return NULL;
125 | }
126 |
127 | int64_t result = ts_tree_cursor_goto_first_child_for_point(&self->cursor, point);
128 | if (result == -1) {
129 | Py_RETURN_NONE;
130 | }
131 | Py_XDECREF(self->node);
132 | self->node = NULL;
133 | return PyLong_FromUnsignedLong((uint32_t)result);
134 | }
135 |
136 | PyObject *tree_cursor_reset(TreeCursor *self, PyObject *args) {
137 | ModuleState *state = GET_MODULE_STATE(self);
138 | PyObject *node_obj;
139 | if (!PyArg_ParseTuple(args, "O!:reset", state->node_type, &node_obj)) {
140 | return NULL;
141 | }
142 |
143 | Node *node = (Node *)node_obj;
144 | ts_tree_cursor_reset(&self->cursor, node->node);
145 | Py_XDECREF(self->node);
146 | self->node = NULL;
147 | Py_RETURN_NONE;
148 | }
149 |
150 | PyObject *tree_cursor_reset_to(TreeCursor *self, PyObject *args) {
151 | ModuleState *state = GET_MODULE_STATE(self);
152 | PyObject *cursor_obj;
153 | if (!PyArg_ParseTuple(args, "O!:reset_to", state->tree_cursor_type, &cursor_obj)) {
154 | return NULL;
155 | }
156 |
157 | TreeCursor *cursor = (TreeCursor *)cursor_obj;
158 | ts_tree_cursor_reset_to(&self->cursor, &cursor->cursor);
159 | Py_XDECREF(self->node);
160 | self->node = NULL;
161 | Py_RETURN_NONE;
162 | }
163 |
164 | PyObject *tree_cursor_copy(TreeCursor *self, PyObject *Py_UNUSED(args)) {
165 | ModuleState *state = GET_MODULE_STATE(self);
166 | TreeCursor *copied = PyObject_New(TreeCursor, state->tree_cursor_type);
167 | if (copied == NULL) {
168 | return NULL;
169 | }
170 |
171 | copied->tree = Py_NewRef(self->tree);
172 | copied->cursor = ts_tree_cursor_copy(&self->cursor);
173 | return PyObject_Init((PyObject *)copied, state->tree_cursor_type);
174 | }
175 |
176 | PyDoc_STRVAR(tree_cursor_goto_first_child_doc,
177 | "goto_first_child(self, /)\n--\n\n"
178 | "Move this cursor to the first child of its current node." DOC_RETURNS "``True`` "
179 | "if the cursor successfully moved, or ``False`` if there were no children.");
180 | PyDoc_STRVAR(tree_cursor_goto_last_child_doc,
181 | "goto_last_child(self, /)\n--\n\n"
182 | "Move this cursor to the last child of its current node." DOC_RETURNS "``True`` "
183 | "if the cursor successfully moved, or ``False`` if there were no children." DOC_CAUTION
184 | "This method may be slower than :meth:`goto_first_child` because it needs "
185 | "to iterate through all the children to compute the child's position.");
186 | PyDoc_STRVAR(tree_cursor_goto_parent_doc,
187 | "goto_parent(self, /)\n--\n\n"
188 | "Move this cursor to the parent of its current node." DOC_RETURNS "``True`` "
189 | "if the cursor successfully moved, or ``False`` if there was no parent node "
190 | "(i.e. the cursor was already on the root node).");
191 | PyDoc_STRVAR(tree_cursor_goto_next_sibling_doc,
192 | "goto_next_sibling(self, /)\n--\n\n"
193 | "Move this cursor to the next sibling of its current node." DOC_RETURNS "``True`` "
194 | "if the cursor successfully moved, or ``False`` if there was no next sibling.");
195 | PyDoc_STRVAR(tree_cursor_goto_previous_sibling_doc,
196 | "goto_previous_sibling(self, /)\n--\n\n"
197 | "Move this cursor to the previous sibling of its current node." DOC_RETURNS
198 | "``True`` if the cursor successfully moved, or ``False`` if there was no previous "
199 | "sibling." DOC_CAUTION
200 | "This method may be slower than :meth:`goto_next_sibling` due to how node positions "
201 | "are stored.\nIn the worst case, this will need to iterate through all the children "
202 | "up to the previous sibling node to recalculate its position.");
203 | PyDoc_STRVAR(
204 | tree_cursor_goto_descendant_doc,
205 | "goto_descendant(self, index, /)\n--\n\n"
206 | "Move the cursor to the node that is the n-th descendant of the original node that the "
207 | "cursor was constructed with, where ``0`` represents the original node itself.");
208 | PyDoc_STRVAR(tree_cursor_goto_first_child_for_byte_doc,
209 | "goto_first_child_for_byte(self, byte, /)\n--\n\n"
210 | "Move this cursor to the first child of its current node that contains or starts "
211 | "after the given byte offset." DOC_RETURNS
212 | "The index of the child node if it was found, ``None`` otherwise.");
213 | PyDoc_STRVAR(tree_cursor_goto_first_child_for_point_doc,
214 | "goto_first_child_for_point(self, point, /)\n--\n\n"
215 | "Move this cursor to the first child of its current node that contains or starts "
216 | "after the given given row/column point." DOC_RETURNS
217 | "The index of the child node if it was found, ``None`` otherwise.");
218 | PyDoc_STRVAR(tree_cursor_reset_doc, "reset(self, node, /)\n--\n\n"
219 | "Re-initialize the cursor to start at the original node "
220 | "that it was constructed with.");
221 | PyDoc_STRVAR(tree_cursor_reset_to_doc,
222 | "reset_to(self, cursor, /)\n--\n\n"
223 | "Re-initialize the cursor to the same position as another cursor.\n\n"
224 | "Unlike :meth:`reset`, this will not lose parent information and allows reusing "
225 | "already created cursors.");
226 | PyDoc_STRVAR(tree_cursor_copy_doc, "copy(self, /)\n--\n\n"
227 | "Create an independent copy of the cursor.");
228 | PyDoc_STRVAR(tree_cursor_copy2_doc, "__copy__(self, /)\n--\n\n"
229 | "Use :func:`copy.copy` to create a copy of the cursor.");
230 |
231 | static PyMethodDef tree_cursor_methods[] = {
232 | {
233 | .ml_name = "goto_first_child",
234 | .ml_meth = (PyCFunction)tree_cursor_goto_first_child,
235 | .ml_flags = METH_NOARGS,
236 | .ml_doc = tree_cursor_goto_first_child_doc,
237 | },
238 | {
239 | .ml_name = "goto_last_child",
240 | .ml_meth = (PyCFunction)tree_cursor_goto_last_child,
241 | .ml_flags = METH_NOARGS,
242 | .ml_doc = tree_cursor_goto_last_child_doc,
243 | },
244 | {
245 | .ml_name = "goto_parent",
246 | .ml_meth = (PyCFunction)tree_cursor_goto_parent,
247 | .ml_flags = METH_NOARGS,
248 | .ml_doc = tree_cursor_goto_parent_doc,
249 | },
250 | {
251 | .ml_name = "goto_next_sibling",
252 | .ml_meth = (PyCFunction)tree_cursor_goto_next_sibling,
253 | .ml_flags = METH_NOARGS,
254 | .ml_doc = tree_cursor_goto_next_sibling_doc,
255 | },
256 | {
257 | .ml_name = "goto_previous_sibling",
258 | .ml_meth = (PyCFunction)tree_cursor_goto_previous_sibling,
259 | .ml_flags = METH_NOARGS,
260 | .ml_doc = tree_cursor_goto_previous_sibling_doc,
261 | },
262 | {
263 | .ml_name = "goto_descendant",
264 | .ml_meth = (PyCFunction)tree_cursor_goto_descendant,
265 | .ml_flags = METH_VARARGS,
266 | .ml_doc = tree_cursor_goto_descendant_doc,
267 | },
268 | {
269 | .ml_name = "goto_first_child_for_byte",
270 | .ml_meth = (PyCFunction)tree_cursor_goto_first_child_for_byte,
271 | .ml_flags = METH_VARARGS,
272 | .ml_doc = tree_cursor_goto_first_child_for_byte_doc,
273 | },
274 | {
275 | .ml_name = "goto_first_child_for_point",
276 | .ml_meth = (PyCFunction)tree_cursor_goto_first_child_for_point,
277 | .ml_flags = METH_VARARGS,
278 | .ml_doc = tree_cursor_goto_first_child_for_point_doc,
279 | },
280 | {
281 | .ml_name = "reset",
282 | .ml_meth = (PyCFunction)tree_cursor_reset,
283 | .ml_flags = METH_VARARGS,
284 | .ml_doc = tree_cursor_reset_doc,
285 | },
286 | {
287 | .ml_name = "reset_to",
288 | .ml_meth = (PyCFunction)tree_cursor_reset_to,
289 | .ml_flags = METH_VARARGS,
290 | .ml_doc = tree_cursor_reset_to_doc,
291 | },
292 | {
293 | .ml_name = "copy",
294 | .ml_meth = (PyCFunction)tree_cursor_copy,
295 | .ml_flags = METH_NOARGS,
296 | .ml_doc = tree_cursor_copy_doc,
297 | },
298 | {.ml_name = "__copy__",
299 | .ml_meth = (PyCFunction)tree_cursor_copy,
300 | .ml_flags = METH_NOARGS,
301 | .ml_doc = tree_cursor_copy2_doc},
302 | {NULL},
303 | };
304 |
305 | static PyGetSetDef tree_cursor_accessors[] = {
306 | {"node", (getter)tree_cursor_get_node, NULL, "The current node.", NULL},
307 | {"descendant_index", (getter)tree_cursor_get_descendant_index, NULL,
308 | PyDoc_STR("The index of the cursor's current node out of all of the descendants of the "
309 | "original node that the cursor was constructed with.\n\n"),
310 | NULL},
311 | {"field_id", (getter)tree_cursor_get_field_id, NULL,
312 | PyDoc_STR("The numerical field id of this tree cursor's current node, if available."), NULL},
313 | {"field_name", (getter)tree_cursor_get_field_name, NULL,
314 | PyDoc_STR("The field name of this tree cursor's current node, if available."), NULL},
315 | {"depth", (getter)tree_cursor_get_depth, NULL,
316 | PyDoc_STR("The depth of the cursor's current node relative to the original node that it was "
317 | "constructed with."),
318 | NULL},
319 | {NULL},
320 | };
321 |
322 | static PyType_Slot tree_cursor_type_slots[] = {
323 | {Py_tp_doc,
324 | PyDoc_STR("A class for walking a syntax :class:`Tree` efficiently." DOC_IMPORTANT
325 | "The cursor can only walk into children of the node it was constructed with.")},
326 | {Py_tp_new, NULL},
327 | {Py_tp_dealloc, tree_cursor_dealloc},
328 | {Py_tp_methods, tree_cursor_methods},
329 | {Py_tp_getset, tree_cursor_accessors},
330 | {0, NULL},
331 | };
332 |
333 | PyType_Spec tree_cursor_type_spec = {
334 | .name = "tree_sitter.TreeCursor",
335 | .basicsize = sizeof(TreeCursor),
336 | .itemsize = 0,
337 | .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
338 | .slots = tree_cursor_type_slots,
339 | };
340 |
--------------------------------------------------------------------------------
/tree_sitter/binding/types.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "tree_sitter/api.h"
4 |
5 | #include
6 |
7 | // Types
8 |
9 | typedef struct {
10 | PyObject_HEAD
11 | TSNode node;
12 | PyObject *children;
13 | PyObject *tree;
14 | } Node;
15 |
16 | typedef struct {
17 | PyObject_HEAD
18 | TSTree *tree;
19 | PyObject *source;
20 | PyObject *language;
21 | } Tree;
22 |
23 | typedef struct {
24 | PyObject_HEAD
25 | TSLanguage *language;
26 | uint32_t abi_version;
27 | const char *name;
28 | } Language;
29 |
30 | typedef struct {
31 | PyObject_HEAD
32 | TSParser *parser;
33 | PyObject *language;
34 | PyObject *logger;
35 | } Parser;
36 |
37 | typedef struct {
38 | PyObject_HEAD
39 | TSTreeCursor cursor;
40 | PyObject *node;
41 | PyObject *tree;
42 | } TreeCursor;
43 |
44 | typedef struct {
45 | PyObject_HEAD
46 | uint32_t capture1_id;
47 | uint32_t capture2_id;
48 | bool is_positive;
49 | bool is_any;
50 | } QueryPredicateEqCapture;
51 |
52 | typedef struct {
53 | PyObject_HEAD
54 | uint32_t capture_id;
55 | PyObject *string_value;
56 | bool is_positive;
57 | bool is_any;
58 | } QueryPredicateEqString;
59 |
60 | typedef struct {
61 | PyObject_HEAD
62 | uint32_t capture_id;
63 | PyObject *pattern;
64 | bool is_positive;
65 | bool is_any;
66 | } QueryPredicateMatch;
67 |
68 | typedef struct {
69 | PyObject_HEAD
70 | uint32_t capture_id;
71 | PyObject *values;
72 | bool is_positive;
73 | } QueryPredicateAnyOf;
74 |
75 | typedef struct {
76 | PyObject_HEAD
77 | PyObject *predicate;
78 | PyObject *arguments;
79 | } QueryPredicateGeneric;
80 |
81 | typedef struct {
82 | PyObject_HEAD
83 | TSQuery *query;
84 | PyObject *predicates;
85 | PyObject *settings;
86 | PyObject *assertions;
87 | } Query;
88 |
89 | typedef struct {
90 | PyObject_HEAD
91 | TSQueryCursor *cursor;
92 | PyObject *query;
93 | } QueryCursor;
94 |
95 | typedef struct {
96 | PyObject_HEAD
97 | TSRange range;
98 | } Range;
99 |
100 | typedef struct {
101 | PyObject_HEAD
102 | TSLookaheadIterator *lookahead_iterator;
103 | PyObject *language;
104 | } LookaheadIterator;
105 |
106 | typedef struct {
107 | TSTreeCursor default_cursor;
108 | PyObject *re_compile;
109 | PyObject *query_error;
110 | PyTypeObject *language_type;
111 | PyTypeObject *log_type_type;
112 | PyTypeObject *lookahead_iterator_type;
113 | PyTypeObject *node_type;
114 | PyTypeObject *parser_type;
115 | PyTypeObject *point_type;
116 | PyTypeObject *query_cursor_type;
117 | PyTypeObject *query_predicate_anyof_type;
118 | PyTypeObject *query_predicate_eq_capture_type;
119 | PyTypeObject *query_predicate_eq_string_type;
120 | PyTypeObject *query_predicate_generic_type;
121 | PyTypeObject *query_predicate_match_type;
122 | PyTypeObject *query_type;
123 | PyTypeObject *range_type;
124 | PyTypeObject *tree_cursor_type;
125 | PyTypeObject *tree_type;
126 | } ModuleState;
127 |
128 | // Macros
129 |
130 | #define GET_MODULE_STATE(obj) ((ModuleState *)PyType_GetModuleState(Py_TYPE(obj)))
131 |
132 | #define IS_INSTANCE_OF(obj, type) PyObject_IsInstance((obj), (PyObject *)(type))
133 |
134 | #define IS_INSTANCE(obj, type_name) IS_INSTANCE_OF(obj, GET_MODULE_STATE(self)->type_name)
135 |
136 | #define POINT_NEW(state, point) \
137 | PyObject_CallFunction((PyObject *)(state)->point_type, "II", (point).row, (point).column)
138 |
139 | #define DEPRECATE(msg) PyErr_WarnEx(PyExc_DeprecationWarning, msg, 1)
140 |
141 | #define REPLACE(old, new) DEPRECATE(old " is deprecated. Use " new " instead.")
142 |
143 | // Docstrings
144 |
145 | #define DOC_ATTENTION "\n\nAttention\n---------\n"
146 | #define DOC_CAUTION "\n\nCaution\n-------\n"
147 | #define DOC_EXAMPLES "\n\nExamples\n--------\n"
148 | #define DOC_IMPORTANT "\n\nImportant\n---------\n"
149 | #define DOC_NOTE "\n\nNote\n----\n"
150 | #define DOC_PARAMETERS "\n\nParameters\n----------\n"
151 | #define DOC_RAISES "\n\nRaises\n------\n"
152 | #define DOC_RETURNS "\n\nReturns\n-------\n"
153 | #define DOC_SEE_ALSO "\n\nSee Also\n--------\n"
154 | #define DOC_HINT "\n\nHint\n----\n"
155 | #define DOC_TIP "\n\nTip\n---\n"
156 |
--------------------------------------------------------------------------------
/tree_sitter/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tree-sitter/py-tree-sitter/52c190d29c67ab84bf71b3e1e873138cc2146f8a/tree_sitter/py.typed
--------------------------------------------------------------------------------