├── .editorconfig ├── .gitattributes ├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ ├── config.yml │ └── feature_request.yml └── workflows │ ├── ci.yml │ ├── fuzz.yml │ ├── lint.yml │ └── publish.yml ├── .gitignore ├── CMakeLists.txt ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── Makefile ├── Package.resolved ├── Package.swift ├── README.md ├── binding.gyp ├── bindings ├── c │ ├── tree-sitter-python.h │ └── tree-sitter-python.pc.in ├── go │ ├── binding.go │ └── binding_test.go ├── node │ ├── binding.cc │ ├── binding_test.js │ ├── index.d.ts │ └── index.js ├── python │ ├── tests │ │ └── test_binding.py │ └── tree_sitter_python │ │ ├── __init__.py │ │ ├── __init__.pyi │ │ ├── binding.c │ │ └── py.typed ├── rust │ ├── build.rs │ └── lib.rs └── swift │ ├── TreeSitterPython │ └── python.h │ └── TreeSitterPythonTests │ └── TreeSitterPythonTests.swift ├── eslint.config.mjs ├── examples ├── compound-statement-without-trailing-newline.py ├── crlf-line-endings.py ├── mixed-spaces-tabs.py ├── multiple-newlines.py ├── python2-grammar-crlf.py ├── python2-grammar.py ├── python3-grammar-crlf.py ├── python3-grammar.py ├── python3.8_grammar.py ├── simple-statements-without-trailing-newline.py ├── tabs.py └── trailing-whitespace.py ├── go.mod ├── go.sum ├── grammar.js ├── package-lock.json ├── package.json ├── pyproject.toml ├── queries ├── highlights.scm └── tags.scm ├── setup.py ├── src ├── grammar.json ├── node-types.json ├── parser.c ├── scanner.c └── tree_sitter │ ├── alloc.h │ ├── array.h │ └── parser.h ├── test ├── corpus │ ├── errors.txt │ ├── expressions.txt │ ├── literals.txt │ ├── pattern_matching.txt │ └── statements.txt ├── highlight │ ├── keywords.py │ ├── parameters.py │ └── pattern_matching.py └── tags │ └── main.py └── tree-sitter.json /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | 6 | [*.{json,toml,yml,gyp}] 7 | indent_style = space 8 | indent_size = 2 9 | 10 | [*.js] 11 | indent_style = space 12 | indent_size = 2 13 | 14 | [*.scm] 15 | indent_style = space 16 | indent_size = 2 17 | 18 | [*.{c,cc,h}] 19 | indent_style = space 20 | indent_size = 4 21 | 22 | [*.rs] 23 | indent_style = space 24 | indent_size = 4 25 | 26 | [*.{py,pyi}] 27 | indent_style = space 28 | indent_size = 4 29 | 30 | [*.swift] 31 | indent_style = space 32 | indent_size = 4 33 | 34 | [*.go] 35 | indent_style = tab 36 | indent_size = 8 37 | 38 | [Makefile] 39 | indent_style = tab 40 | indent_size = 8 41 | 42 | [parser.c] 43 | indent_size = 2 44 | 45 | [{alloc,array,parser}.h] 46 | indent_size = 2 47 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto eol=lf 2 | 3 | # Generated source files 4 | src/*.json linguist-generated 5 | src/parser.c linguist-generated 6 | src/tree_sitter/* linguist-generated 7 | 8 | # C bindings 9 | bindings/c/* linguist-generated 10 | CMakeLists.txt linguist-generated 11 | Makefile linguist-generated 12 | 13 | # Rust bindings 14 | bindings/rust/* linguist-generated 15 | Cargo.toml linguist-generated 16 | Cargo.lock linguist-generated 17 | 18 | # Node.js bindings 19 | bindings/node/* linguist-generated 20 | binding.gyp linguist-generated 21 | package.json linguist-generated 22 | package-lock.json linguist-generated 23 | 24 | # Python bindings 25 | bindings/python/** linguist-generated 26 | setup.py linguist-generated 27 | pyproject.toml linguist-generated 28 | 29 | # Go bindings 30 | bindings/go/* linguist-generated 31 | go.mod linguist-generated 32 | go.sum linguist-generated 33 | 34 | # Swift bindings 35 | bindings/swift/** linguist-generated 36 | Package.swift linguist-generated 37 | Package.resolved linguist-generated 38 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: tree-sitter 4 | patreon: # Replace with a single Patreon username 5 | open_collective: tree-sitter # Replace with a single Open Collective username 6 | ko_fi: amaanq 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 12 | polar: # Replace with a single Polar username 13 | buy_me_a_coffee: # Replace with a single Buy Me a Coffee username 14 | thanks_dev: # Replace with a single thanks.dev username 15 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 16 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: Bug Report 2 | description: File a bug or issue 3 | title: "bug: " 4 | labels: [bug] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | **Before** reporting an issue, make sure to search [existing issues](https://github.com/tree-sitter/tree-sitter-python/issues). Usage questions such as ***"How do I...?"*** either belong in [Discussions](https://github.com/tree-sitter/tree-sitter/discussions) upstream or in our [Discord server](https://discord.gg/w7nTvsVJhm) and will be closed. 10 | If your issue is related to a bug in your editor-experience because your editor *leverages* tree-sitter and this parser, then it is likely your issue does *NOT* belong here and belongs in the relevant editor's repository. 11 | - type: checkboxes 12 | attributes: 13 | label: Did you check existing issues? 14 | description: Make sure you've checked all of the below before submitting an issue 15 | options: 16 | - label: I have read all the [tree-sitter docs](https://tree-sitter.github.io/tree-sitter/using-parsers) if it relates to using the parser 17 | required: false 18 | - label: I have searched the existing issues of tree-sitter-python 19 | required: true 20 | - type: input 21 | attributes: 22 | label: "Tree-Sitter CLI Version, if relevant (output of `tree-sitter --version`)" 23 | placeholder: "tree-sitter 0.20.8 (6bbb50bef8249e6460e7d69e42cc8146622fa4fd)" 24 | validations: 25 | required: false 26 | - type: textarea 27 | attributes: 28 | label: Describe the bug 29 | description: A clear and concise description of what the bug is. Please include any related errors you see such as parsing errors or tree-sitter cli errors. 30 | validations: 31 | required: true 32 | - type: textarea 33 | attributes: 34 | label: Steps To Reproduce/Bad Parse Tree 35 | description: Steps to reproduce the behavior. If you have a bad parse tree, please include it here. You can get this by running `tree-sitter parse ` and copying the output. 36 | placeholder: | 37 | 1. 38 | 2. 39 | 3. 40 | validations: 41 | required: true 42 | - type: textarea 43 | attributes: 44 | label: Expected Behavior/Parse Tree 45 | description: A concise description of what you expected to happen, or in the case of a bad parse tree, the expected parse tree. 46 | validations: 47 | required: true 48 | - type: textarea 49 | attributes: 50 | label: Repro 51 | description: Minimal code to reproduce this issue. Ideally this should be reproducible with the C library or the tree-sitter cli, do not suggest an editor or external tool. 52 | value: | 53 | # Example code that causes the issue 54 | def foo(): 55 | # Code that fails to parse, or causes an error 56 | ... 57 | render: Python 58 | validations: 59 | required: false 60 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: Feature Request 2 | description: Suggest a new feature 3 | title: "feature: " 4 | labels: [enhancement] 5 | body: 6 | - type: checkboxes 7 | attributes: 8 | label: Did you check the tree-sitter docs? 9 | description: Make sure you read all the docs before submitting a feature request 10 | options: 11 | - label: I have read all the [tree-sitter docs](https://tree-sitter.github.io/tree-sitter/using-parsers) if it relates to using the parser 12 | required: false 13 | - type: textarea 14 | validations: 15 | required: true 16 | attributes: 17 | label: Is your feature request related to a problem? Please describe. 18 | description: A clear and concise description of what the problem is. Ex. I think the grammar models this rule incorrectly and can be improved, or the scanner can be improved by doing [...], or Python has officially added a new feature that should be added to the grammar. 19 | - type: textarea 20 | validations: 21 | required: true 22 | attributes: 23 | label: Describe the solution you'd like 24 | description: A clear and concise description of what you want to happen. 25 | - type: textarea 26 | validations: 27 | required: true 28 | attributes: 29 | label: Describe alternatives you've considered 30 | description: A clear and concise description of any alternative solutions or features you've considered. 31 | - type: textarea 32 | validations: 33 | required: false 34 | attributes: 35 | label: Additional context 36 | description: Add any other context or screenshots about the feature request here. If your feature request is related to a new Python feature, please include a link to the relevant **official** Python documentation. 37 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | paths: 7 | - grammar.js 8 | - src/** 9 | - test/** 10 | - bindings/** 11 | - binding.gyp 12 | pull_request: 13 | paths: 14 | - grammar.js 15 | - src/** 16 | - test/** 17 | - bindings/** 18 | - binding.gyp 19 | 20 | concurrency: 21 | group: ${{github.workflow}}-${{github.ref}} 22 | cancel-in-progress: true 23 | 24 | jobs: 25 | test: 26 | name: Test parser 27 | runs-on: ${{matrix.os}} 28 | strategy: 29 | fail-fast: false 30 | matrix: 31 | os: [ubuntu-latest, windows-latest, macos-14] 32 | steps: 33 | - name: Checkout repository 34 | uses: actions/checkout@v4 35 | - name: Set up tree-sitter 36 | uses: tree-sitter/setup-action/cli@v2 37 | - name: Set up examples 38 | run: |- 39 | git clone https://github.com/numpy/numpy examples/numpy --single-branch --depth=1 --filter=blob:none 40 | git clone https://github.com/django/django examples/django --single-branch --depth=1 --filter=blob:none 41 | git clone https://github.com/pallets/flask examples/flask --single-branch --depth=1 --filter=blob:none 42 | git clone https://github.com/python/cpython examples/cpython --single-branch --depth=1 --filter=blob:none 43 | - name: Run tests 44 | uses: tree-sitter/parser-test-action@v2 45 | with: 46 | test-rust: true 47 | test-node: true 48 | test-python: true 49 | test-go: true 50 | test-swift: true 51 | - name: Parse examples 52 | uses: tree-sitter/parse-action@v4 53 | with: 54 | files: | 55 | examples/**/*.py 56 | !examples/cpython/Lib/test/test_annotationlib.py 57 | !examples/cpython/Lib/test/test_type_params.py 58 | !examples/cpython/Lib/test/test_compile.py 59 | !examples/cpython/Tools/build/generate_re_casefix.py 60 | !examples/cpython/Lib/test/test_annotationlib.py 61 | !examples/cpython/Lib/test/test_type_params.py 62 | invalid-files: | 63 | examples/cpython/Lib/test/tokenizedata/badsyntax_3131.py 64 | -------------------------------------------------------------------------------- /.github/workflows/fuzz.yml: -------------------------------------------------------------------------------- 1 | name: Fuzz Parser 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | paths: 7 | - src/scanner.c 8 | pull_request: 9 | paths: 10 | - src/scanner.c 11 | 12 | jobs: 13 | fuzz: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Checkout repository 17 | uses: actions/checkout@v4 18 | - name: Run fuzzer 19 | uses: tree-sitter/fuzz-action@v4 20 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | paths: 7 | - grammar.js 8 | pull_request: 9 | paths: 10 | - grammar.js 11 | 12 | jobs: 13 | lint: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Checkout repository 17 | uses: actions/checkout@v4 18 | - name: Set up Node.js 19 | uses: actions/setup-node@v4 20 | with: 21 | cache: npm 22 | node-version: ${{vars.NODE_VERSION}} 23 | - name: Install modules 24 | run: npm ci --legacy-peer-deps 25 | - name: Run ESLint 26 | run: npm run lint 27 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish packages 2 | 3 | on: 4 | push: 5 | tags: ["*"] 6 | 7 | permissions: 8 | contents: write 9 | id-token: write 10 | attestations: write 11 | 12 | jobs: 13 | github: 14 | uses: tree-sitter/workflows/.github/workflows/release.yml@main 15 | with: 16 | generate: true 17 | attestations: true 18 | npm: 19 | uses: tree-sitter/workflows/.github/workflows/package-npm.yml@main 20 | secrets: 21 | NODE_AUTH_TOKEN: ${{secrets.NPM_TOKEN}} 22 | with: 23 | generate: true 24 | crates: 25 | uses: tree-sitter/workflows/.github/workflows/package-crates.yml@main 26 | secrets: 27 | CARGO_REGISTRY_TOKEN: ${{secrets.CARGO_REGISTRY_TOKEN}} 28 | with: 29 | generate: true 30 | pypi: 31 | uses: tree-sitter/workflows/.github/workflows/package-pypi.yml@main 32 | secrets: 33 | PYPI_API_TOKEN: ${{secrets.PYPI_API_TOKEN}} 34 | with: 35 | generate: true 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Rust artifacts 2 | target/ 3 | 4 | # Node artifacts 5 | build/ 6 | prebuilds/ 7 | node_modules/ 8 | 9 | # Swift artifacts 10 | .build/ 11 | 12 | # Go artifacts 13 | _obj/ 14 | 15 | # Python artifacts 16 | .venv/ 17 | dist/ 18 | *.egg-info 19 | *.whl 20 | 21 | # C artifacts 22 | *.a 23 | *.so 24 | *.so.* 25 | *.dylib 26 | *.dll 27 | *.pc 28 | 29 | # Example dirs 30 | /examples/*/ 31 | 32 | # Grammar volatiles 33 | *.wasm 34 | *.obj 35 | *.o 36 | 37 | # Archives 38 | *.tar.gz 39 | *.tgz 40 | *.zip 41 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.13) 2 | 3 | project(tree-sitter-python 4 | VERSION "0.23.6" 5 | DESCRIPTION "Python grammar for tree-sitter" 6 | HOMEPAGE_URL "https://github.com/tree-sitter/tree-sitter-python" 7 | LANGUAGES C) 8 | 9 | option(BUILD_SHARED_LIBS "Build using shared libraries" ON) 10 | option(TREE_SITTER_REUSE_ALLOCATOR "Reuse the library allocator" OFF) 11 | 12 | set(TREE_SITTER_ABI_VERSION 14 CACHE STRING "Tree-sitter ABI version") 13 | if(NOT ${TREE_SITTER_ABI_VERSION} MATCHES "^[0-9]+$") 14 | unset(TREE_SITTER_ABI_VERSION CACHE) 15 | message(FATAL_ERROR "TREE_SITTER_ABI_VERSION must be an integer") 16 | endif() 17 | 18 | find_program(TREE_SITTER_CLI tree-sitter DOC "Tree-sitter CLI") 19 | 20 | add_custom_command(OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/src/parser.c" 21 | DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/grammar.json" 22 | COMMAND "${TREE_SITTER_CLI}" generate src/grammar.json 23 | --abi=${TREE_SITTER_ABI_VERSION} 24 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" 25 | COMMENT "Generating parser.c") 26 | 27 | add_library(tree-sitter-python src/parser.c) 28 | if(EXISTS src/scanner.c) 29 | target_sources(tree-sitter-python PRIVATE src/scanner.c) 30 | endif() 31 | target_include_directories(tree-sitter-python PRIVATE src) 32 | 33 | target_compile_definitions(tree-sitter-python PRIVATE 34 | $<$:TREE_SITTER_REUSE_ALLOCATOR> 35 | $<$:TREE_SITTER_DEBUG>) 36 | 37 | set_target_properties(tree-sitter-python 38 | PROPERTIES 39 | C_STANDARD 11 40 | POSITION_INDEPENDENT_CODE ON 41 | SOVERSION "${TREE_SITTER_ABI_VERSION}.${PROJECT_VERSION_MAJOR}" 42 | DEFINE_SYMBOL "") 43 | 44 | configure_file(bindings/c/tree-sitter-python.pc.in 45 | "${CMAKE_CURRENT_BINARY_DIR}/tree-sitter-python.pc" @ONLY) 46 | 47 | include(GNUInstallDirs) 48 | 49 | install(FILES bindings/c/tree-sitter-python.h 50 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/tree_sitter") 51 | install(FILES "${CMAKE_CURRENT_BINARY_DIR}/tree-sitter-python.pc" 52 | DESTINATION "${CMAKE_INSTALL_DATAROOTDIR}/pkgconfig") 53 | install(TARGETS tree-sitter-python 54 | LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}") 55 | 56 | add_custom_target(ts-test "${TREE_SITTER_CLI}" test 57 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" 58 | COMMENT "tree-sitter test") 59 | 60 | # vim:ft=cmake: 61 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 4 4 | 5 | [[package]] 6 | name = "aho-corasick" 7 | version = "1.1.3" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 10 | dependencies = [ 11 | "memchr", 12 | ] 13 | 14 | [[package]] 15 | name = "cc" 16 | version = "1.2.5" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "c31a0499c1dc64f458ad13872de75c0eb7e3fdb0e67964610c914b034fc5956e" 19 | dependencies = [ 20 | "shlex", 21 | ] 22 | 23 | [[package]] 24 | name = "memchr" 25 | version = "2.7.4" 26 | source = "registry+https://github.com/rust-lang/crates.io-index" 27 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" 28 | 29 | [[package]] 30 | name = "regex" 31 | version = "1.11.1" 32 | source = "registry+https://github.com/rust-lang/crates.io-index" 33 | checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" 34 | dependencies = [ 35 | "aho-corasick", 36 | "memchr", 37 | "regex-automata", 38 | "regex-syntax", 39 | ] 40 | 41 | [[package]] 42 | name = "regex-automata" 43 | version = "0.4.9" 44 | source = "registry+https://github.com/rust-lang/crates.io-index" 45 | checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" 46 | dependencies = [ 47 | "aho-corasick", 48 | "memchr", 49 | "regex-syntax", 50 | ] 51 | 52 | [[package]] 53 | name = "regex-syntax" 54 | version = "0.8.5" 55 | source = "registry+https://github.com/rust-lang/crates.io-index" 56 | checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" 57 | 58 | [[package]] 59 | name = "shlex" 60 | version = "1.3.0" 61 | source = "registry+https://github.com/rust-lang/crates.io-index" 62 | checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 63 | 64 | [[package]] 65 | name = "streaming-iterator" 66 | version = "0.1.9" 67 | source = "registry+https://github.com/rust-lang/crates.io-index" 68 | checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" 69 | 70 | [[package]] 71 | name = "tree-sitter" 72 | version = "0.24.5" 73 | source = "registry+https://github.com/rust-lang/crates.io-index" 74 | checksum = "8ac95b18f0f727aaaa012bd5179a1916706ee3ed071920fdbda738750b0c0bf5" 75 | dependencies = [ 76 | "cc", 77 | "regex", 78 | "regex-syntax", 79 | "streaming-iterator", 80 | "tree-sitter-language", 81 | ] 82 | 83 | [[package]] 84 | name = "tree-sitter-language" 85 | version = "0.1.3" 86 | source = "registry+https://github.com/rust-lang/crates.io-index" 87 | checksum = "c199356c799a8945965bb5f2c55b2ad9d9aa7c4b4f6e587fe9dea0bc715e5f9c" 88 | 89 | [[package]] 90 | name = "tree-sitter-python" 91 | version = "0.23.6" 92 | dependencies = [ 93 | "cc", 94 | "tree-sitter", 95 | "tree-sitter-language", 96 | ] 97 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "tree-sitter-python" 3 | description = "Python grammar for tree-sitter" 4 | version = "0.23.6" 5 | authors = [ 6 | "Max Brunsfeld ", 7 | "Amaan Qureshi ", 8 | ] 9 | license = "MIT" 10 | readme = "README.md" 11 | keywords = ["incremental", "parsing", "tree-sitter", "python"] 12 | categories = ["parser-implementations", "parsing", "text-editors"] 13 | repository = "https://github.com/tree-sitter/tree-sitter-python" 14 | edition = "2021" 15 | autoexamples = false 16 | 17 | build = "bindings/rust/build.rs" 18 | include = ["LICENSE", "bindings/rust/*", "grammar.js", "queries/*", "src/*", "tree-sitter.json"] 19 | 20 | [lib] 21 | path = "bindings/rust/lib.rs" 22 | 23 | [dependencies] 24 | tree-sitter-language = "0.1" 25 | 26 | [build-dependencies] 27 | cc = "1.1" 28 | 29 | [dev-dependencies] 30 | tree-sitter = "0.24" 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Max Brunsfeld 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ifeq ($(OS),Windows_NT) 2 | $(error Windows is not supported) 3 | endif 4 | 5 | LANGUAGE_NAME := tree-sitter-python 6 | HOMEPAGE_URL := https://github.com/tree-sitter/tree-sitter-python 7 | VERSION := 0.23.6 8 | 9 | # repository 10 | SRC_DIR := src 11 | 12 | TS ?= tree-sitter 13 | 14 | # install directory layout 15 | PREFIX ?= /usr/local 16 | INCLUDEDIR ?= $(PREFIX)/include 17 | LIBDIR ?= $(PREFIX)/lib 18 | PCLIBDIR ?= $(LIBDIR)/pkgconfig 19 | 20 | # source/object files 21 | PARSER := $(SRC_DIR)/parser.c 22 | EXTRAS := $(filter-out $(PARSER),$(wildcard $(SRC_DIR)/*.c)) 23 | OBJS := $(patsubst %.c,%.o,$(PARSER) $(EXTRAS)) 24 | 25 | # flags 26 | ARFLAGS ?= rcs 27 | override CFLAGS += -I$(SRC_DIR) -std=c11 -fPIC 28 | 29 | # ABI versioning 30 | SONAME_MAJOR = $(shell sed -n 's/\#define LANGUAGE_VERSION //p' $(PARSER)) 31 | SONAME_MINOR = $(word 1,$(subst ., ,$(VERSION))) 32 | 33 | # OS-specific bits 34 | ifeq ($(shell uname),Darwin) 35 | SOEXT = dylib 36 | SOEXTVER_MAJOR = $(SONAME_MAJOR).$(SOEXT) 37 | SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).$(SOEXT) 38 | LINKSHARED = -dynamiclib -Wl,-install_name,$(LIBDIR)/lib$(LANGUAGE_NAME).$(SOEXTVER),-rpath,@executable_path/../Frameworks 39 | else 40 | SOEXT = so 41 | SOEXTVER_MAJOR = $(SOEXT).$(SONAME_MAJOR) 42 | SOEXTVER = $(SOEXT).$(SONAME_MAJOR).$(SONAME_MINOR) 43 | LINKSHARED = -shared -Wl,-soname,lib$(LANGUAGE_NAME).$(SOEXTVER) 44 | endif 45 | ifneq ($(filter $(shell uname),FreeBSD NetBSD DragonFly),) 46 | PCLIBDIR := $(PREFIX)/libdata/pkgconfig 47 | endif 48 | 49 | all: lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) $(LANGUAGE_NAME).pc 50 | 51 | lib$(LANGUAGE_NAME).a: $(OBJS) 52 | $(AR) $(ARFLAGS) $@ $^ 53 | 54 | lib$(LANGUAGE_NAME).$(SOEXT): $(OBJS) 55 | $(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@ 56 | ifneq ($(STRIP),) 57 | $(STRIP) $@ 58 | endif 59 | 60 | $(LANGUAGE_NAME).pc: bindings/c/$(LANGUAGE_NAME).pc.in 61 | sed -e 's|@PROJECT_VERSION@|$(VERSION)|' \ 62 | -e 's|@CMAKE_INSTALL_LIBDIR@|$(LIBDIR:$(PREFIX)/%=%)|' \ 63 | -e 's|@CMAKE_INSTALL_INCLUDEDIR@|$(INCLUDEDIR:$(PREFIX)/%=%)|' \ 64 | -e 's|@PROJECT_DESCRIPTION@|$(DESCRIPTION)|' \ 65 | -e 's|@PROJECT_HOMEPAGE_URL@|$(HOMEPAGE_URL)|' \ 66 | -e 's|@CMAKE_INSTALL_PREFIX@|$(PREFIX)|' $< > $@ 67 | 68 | $(PARSER): $(SRC_DIR)/grammar.json 69 | $(TS) generate $^ 70 | 71 | install: all 72 | install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter '$(DESTDIR)$(PCLIBDIR)' '$(DESTDIR)$(LIBDIR)' 73 | install -m644 bindings/c/$(LANGUAGE_NAME).h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h 74 | install -m644 $(LANGUAGE_NAME).pc '$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc 75 | install -m644 lib$(LANGUAGE_NAME).a '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a 76 | install -m755 lib$(LANGUAGE_NAME).$(SOEXT) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER) 77 | ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) 78 | ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT) 79 | 80 | uninstall: 81 | $(RM) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a \ 82 | '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER) \ 83 | '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) \ 84 | '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT) \ 85 | '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h \ 86 | '$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc 87 | 88 | clean: 89 | $(RM) $(OBJS) $(LANGUAGE_NAME).pc lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) 90 | 91 | test: 92 | $(TS) test 93 | 94 | .PHONY: all install uninstall clean test 95 | -------------------------------------------------------------------------------- /Package.resolved: -------------------------------------------------------------------------------- 1 | { 2 | "object": { 3 | "pins": [ 4 | { 5 | "package": "SwiftTreeSitter", 6 | "repositoryURL": "https://github.com/ChimeHQ/SwiftTreeSitter", 7 | "state": { 8 | "branch": null, 9 | "revision": "2599e95310b3159641469d8a21baf2d3d200e61f", 10 | "version": "0.8.0" 11 | } 12 | } 13 | ] 14 | }, 15 | "version": 1 16 | } 17 | -------------------------------------------------------------------------------- /Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version:5.3 2 | import PackageDescription 3 | 4 | let package = Package( 5 | name: "TreeSitterPython", 6 | defaultLocalization: "en", 7 | products: [ 8 | .library(name: "TreeSitterPython", targets: ["TreeSitterPython"]), 9 | ], 10 | dependencies: [ 11 | .package(url: "https://github.com/ChimeHQ/SwiftTreeSitter", from: "0.8.0"), 12 | ], 13 | targets: [ 14 | .target( 15 | name: "TreeSitterPython", 16 | dependencies: [], 17 | path: ".", 18 | sources: [ 19 | "src/parser.c", 20 | "src/scanner.c", 21 | ], 22 | resources: [ 23 | .copy("queries") 24 | ], 25 | publicHeadersPath: "bindings/swift", 26 | cSettings: [.headerSearchPath("src")] 27 | ), 28 | .testTarget( 29 | name: "TreeSitterPythonTests", 30 | dependencies: [ 31 | "SwiftTreeSitter", 32 | "TreeSitterPython", 33 | ], 34 | path: "bindings/swift/TreeSitterPythonTests" 35 | ) 36 | ], 37 | cLanguageStandard: .c11 38 | ) 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tree-sitter-python 2 | 3 | [![CI][ci]](https://github.com/tree-sitter/tree-sitter-python/actions/workflows/ci.yml) 4 | [![discord][discord]](https://discord.gg/w7nTvsVJhm) 5 | [![matrix][matrix]](https://matrix.to/#/#tree-sitter-chat:matrix.org) 6 | [![crates][crates]](https://crates.io/crates/tree-sitter-python) 7 | [![npm][npm]](https://www.npmjs.com/package/tree-sitter-python) 8 | [![pypi][pypi]](https://pypi.org/project/tree-sitter-python/) 9 | 10 | Python grammar for [tree-sitter][]. 11 | 12 | [tree-sitter]: https://github.com/tree-sitter/tree-sitter 13 | 14 | ## References 15 | 16 | - [Python 2 Grammar](https://docs.python.org/2/reference/grammar.html) 17 | - [Python 3 Grammar](https://docs.python.org/3/reference/grammar.html) 18 | 19 | [ci]: https://img.shields.io/github/actions/workflow/status/tree-sitter/tree-sitter-python/ci.yml?logo=github&label=CI 20 | [discord]: https://img.shields.io/discord/1063097320771698699?logo=discord&label=discord 21 | [matrix]: https://img.shields.io/matrix/tree-sitter-chat%3Amatrix.org?logo=matrix&label=matrix 22 | [npm]: https://img.shields.io/npm/v/tree-sitter-python?logo=npm 23 | [crates]: https://img.shields.io/crates/v/tree-sitter-python?logo=rust 24 | [pypi]: https://img.shields.io/pypi/v/tree-sitter-python?logo=pypi&logoColor=ffd242 25 | -------------------------------------------------------------------------------- /binding.gyp: -------------------------------------------------------------------------------- 1 | { 2 | "targets": [ 3 | { 4 | "target_name": "tree_sitter_python_binding", 5 | "dependencies": [ 6 | " 2 | 3 | typedef struct TSLanguage TSLanguage; 4 | 5 | extern "C" TSLanguage *tree_sitter_python(); 6 | 7 | // "tree-sitter", "language" hashed with BLAKE2 8 | const napi_type_tag LANGUAGE_TYPE_TAG = { 9 | 0x8AF2E5212AD58ABF, 0xD5006CAD83ABBA16 10 | }; 11 | 12 | Napi::Object Init(Napi::Env env, Napi::Object exports) { 13 | exports["name"] = Napi::String::New(env, "python"); 14 | auto language = Napi::External::New(env, tree_sitter_python()); 15 | language.TypeTag(&LANGUAGE_TYPE_TAG); 16 | exports["language"] = language; 17 | return exports; 18 | } 19 | 20 | NODE_API_MODULE(tree_sitter_python_binding, Init) 21 | -------------------------------------------------------------------------------- /bindings/node/binding_test.js: -------------------------------------------------------------------------------- 1 | const assert = require("node:assert"); 2 | const { test } = require("node:test"); 3 | 4 | const Parser = require("tree-sitter"); 5 | 6 | test("can load grammar", () => { 7 | const parser = new Parser(); 8 | assert.doesNotThrow(() => parser.setLanguage(require("."))); 9 | }); 10 | -------------------------------------------------------------------------------- /bindings/node/index.d.ts: -------------------------------------------------------------------------------- 1 | type BaseNode = { 2 | type: string; 3 | named: boolean; 4 | }; 5 | 6 | type ChildNode = { 7 | multiple: boolean; 8 | required: boolean; 9 | types: BaseNode[]; 10 | }; 11 | 12 | type NodeInfo = 13 | | (BaseNode & { 14 | subtypes: BaseNode[]; 15 | }) 16 | | (BaseNode & { 17 | fields: { [name: string]: ChildNode }; 18 | children: ChildNode[]; 19 | }); 20 | 21 | type Language = { 22 | name: string; 23 | language: unknown; 24 | nodeTypeInfo: NodeInfo[]; 25 | }; 26 | 27 | declare const language: Language; 28 | export = language; 29 | -------------------------------------------------------------------------------- /bindings/node/index.js: -------------------------------------------------------------------------------- 1 | const root = require("path").join(__dirname, "..", ".."); 2 | 3 | module.exports = 4 | typeof process.versions.bun === "string" 5 | // Support `bun build --compile` by being statically analyzable enough to find the .node file at build-time 6 | ? require(`../../prebuilds/${process.platform}-${process.arch}/tree-sitter-python.node`) 7 | : require("node-gyp-build")(root); 8 | 9 | try { 10 | module.exports.nodeTypeInfo = require("../../src/node-types.json"); 11 | } catch (_) {} 12 | -------------------------------------------------------------------------------- /bindings/python/tests/test_binding.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | import tree_sitter, tree_sitter_python 4 | 5 | 6 | class TestLanguage(TestCase): 7 | def test_can_load_grammar(self): 8 | try: 9 | tree_sitter.Language(tree_sitter_python.language()) 10 | except Exception: 11 | self.fail("Error loading Python grammar") 12 | -------------------------------------------------------------------------------- /bindings/python/tree_sitter_python/__init__.py: -------------------------------------------------------------------------------- 1 | """Python grammar for tree-sitter""" 2 | 3 | from importlib.resources import files as _files 4 | 5 | from ._binding import language 6 | 7 | 8 | def _get_query(name, file): 9 | query = _files(f"{__package__}.queries") / file 10 | globals()[name] = query.read_text() 11 | return globals()[name] 12 | 13 | 14 | def __getattr__(name): 15 | if name == "HIGHLIGHTS_QUERY": 16 | return _get_query("HIGHLIGHTS_QUERY", "highlights.scm") 17 | if name == "TAGS_QUERY": 18 | return _get_query("TAGS_QUERY", "tags.scm") 19 | 20 | raise AttributeError(f"module {__name__!r} has no attribute {name!r}") 21 | 22 | 23 | __all__ = [ 24 | "language", 25 | "HIGHLIGHTS_QUERY", 26 | "TAGS_QUERY", 27 | ] 28 | 29 | 30 | def __dir__(): 31 | return sorted(__all__ + [ 32 | "__all__", "__builtins__", "__cached__", "__doc__", "__file__", 33 | "__loader__", "__name__", "__package__", "__path__", "__spec__", 34 | ]) 35 | -------------------------------------------------------------------------------- /bindings/python/tree_sitter_python/__init__.pyi: -------------------------------------------------------------------------------- 1 | from typing import Final 2 | 3 | HIGHLIGHTS_QUERY: Final[str] 4 | TAGS_QUERY: Final[str] 5 | 6 | def language() -> object: ... 7 | -------------------------------------------------------------------------------- /bindings/python/tree_sitter_python/binding.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | typedef struct TSLanguage TSLanguage; 4 | 5 | TSLanguage *tree_sitter_python(void); 6 | 7 | static PyObject* _binding_language(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args)) { 8 | return PyCapsule_New(tree_sitter_python(), "tree_sitter.Language", NULL); 9 | } 10 | 11 | static PyMethodDef methods[] = { 12 | {"language", _binding_language, METH_NOARGS, 13 | "Get the tree-sitter language for this grammar."}, 14 | {NULL, NULL, 0, NULL} 15 | }; 16 | 17 | static struct PyModuleDef module = { 18 | .m_base = PyModuleDef_HEAD_INIT, 19 | .m_name = "_binding", 20 | .m_doc = NULL, 21 | .m_size = -1, 22 | .m_methods = methods 23 | }; 24 | 25 | PyMODINIT_FUNC PyInit__binding(void) { 26 | return PyModule_Create(&module); 27 | } 28 | -------------------------------------------------------------------------------- /bindings/python/tree_sitter_python/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tree-sitter/tree-sitter-python/710796b8b877a970297106e5bbc8e2afa47f86ec/bindings/python/tree_sitter_python/py.typed -------------------------------------------------------------------------------- /bindings/rust/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | let src_dir = std::path::Path::new("src"); 3 | 4 | let mut c_config = cc::Build::new(); 5 | c_config 6 | .std("c11") 7 | .include(src_dir) 8 | .flag_if_supported("-Wno-unused-value"); 9 | 10 | #[cfg(target_env = "msvc")] 11 | c_config.flag("-utf-8"); 12 | 13 | let parser_path = src_dir.join("parser.c"); 14 | c_config.file(&parser_path); 15 | println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap()); 16 | 17 | let scanner_path = src_dir.join("scanner.c"); 18 | c_config.file(&scanner_path); 19 | println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap()); 20 | 21 | c_config.compile("tree-sitter-python"); 22 | } 23 | -------------------------------------------------------------------------------- /bindings/rust/lib.rs: -------------------------------------------------------------------------------- 1 | //! This crate provides Python language support for the [tree-sitter][] parsing library. 2 | //! 3 | //! Typically, you will use the [LANGUAGE][] constant to add this language to a 4 | //! tree-sitter [Parser][], and then use the parser to parse some code: 5 | //! 6 | //! ``` 7 | //! use tree_sitter::Parser; 8 | //! 9 | //! let code = r#" 10 | //! def double(x): 11 | //! return x * 2 12 | //! "#; 13 | //! let mut parser = Parser::new(); 14 | //! let language = tree_sitter_python::LANGUAGE; 15 | //! parser 16 | //! .set_language(&language.into()) 17 | //! .expect("Error loading Python parser"); 18 | //! let tree = parser.parse(code, None).unwrap(); 19 | //! assert!(!tree.root_node().has_error()); 20 | //! ``` 21 | //! 22 | //! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html 23 | //! [tree-sitter]: https://tree-sitter.github.io/ 24 | 25 | use tree_sitter_language::LanguageFn; 26 | 27 | extern "C" { 28 | fn tree_sitter_python() -> *const (); 29 | } 30 | 31 | /// The tree-sitter [`LanguageFn`][LanguageFn] for this grammar. 32 | /// 33 | /// [LanguageFn]: https://docs.rs/tree-sitter-language/*/tree_sitter_language/struct.LanguageFn.html 34 | pub const LANGUAGE: LanguageFn = unsafe { LanguageFn::from_raw(tree_sitter_python) }; 35 | 36 | /// The content of the [`node-types.json`][] file for this grammar. 37 | /// 38 | /// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types 39 | pub const NODE_TYPES: &str = include_str!("../../src/node-types.json"); 40 | 41 | /// The syntax highlighting query for this language. 42 | pub const HIGHLIGHTS_QUERY: &str = include_str!("../../queries/highlights.scm"); 43 | 44 | /// The symbol tagging query for this language. 45 | pub const TAGS_QUERY: &str = include_str!("../../queries/tags.scm"); 46 | 47 | #[cfg(test)] 48 | mod tests { 49 | #[test] 50 | fn test_can_load_grammar() { 51 | let mut parser = tree_sitter::Parser::new(); 52 | parser 53 | .set_language(&super::LANGUAGE.into()) 54 | .expect("Error loading Python parser"); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /bindings/swift/TreeSitterPython/python.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_PYTHON_H_ 2 | #define TREE_SITTER_PYTHON_H_ 3 | 4 | typedef struct TSLanguage TSLanguage; 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | const TSLanguage *tree_sitter_python(void); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | #endif // TREE_SITTER_PYTHON_H_ 17 | -------------------------------------------------------------------------------- /bindings/swift/TreeSitterPythonTests/TreeSitterPythonTests.swift: -------------------------------------------------------------------------------- 1 | import XCTest 2 | import SwiftTreeSitter 3 | import TreeSitterPython 4 | 5 | final class TreeSitterPythonTests: XCTestCase { 6 | func testCanLoadGrammar() throws { 7 | let parser = Parser() 8 | let language = Language(language: tree_sitter_python()) 9 | XCTAssertNoThrow(try parser.setLanguage(language), 10 | "Error loading Python grammar") 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /eslint.config.mjs: -------------------------------------------------------------------------------- 1 | import treesitter from 'eslint-config-treesitter'; 2 | 3 | export default [ 4 | ...treesitter, 5 | ]; 6 | -------------------------------------------------------------------------------- /examples/compound-statement-without-trailing-newline.py: -------------------------------------------------------------------------------- 1 | class Foo: 2 | def bar(): 3 | print "hi" -------------------------------------------------------------------------------- /examples/crlf-line-endings.py: -------------------------------------------------------------------------------- 1 | print a 2 | 3 | if b: 4 | if c: 5 | d 6 | e 7 | -------------------------------------------------------------------------------- /examples/mixed-spaces-tabs.py: -------------------------------------------------------------------------------- 1 | def main(): 2 | print "hello" 3 | # 1 tab = 8 spaces in Python 2 4 | return 5 | -------------------------------------------------------------------------------- /examples/multiple-newlines.py: -------------------------------------------------------------------------------- 1 | def hi(): 2 | 3 | 4 | 5 | print "hi" 6 | 7 | 8 | def bye(): 9 | print "bye" 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /examples/python2-grammar.py: -------------------------------------------------------------------------------- 1 | # Python test set -- part 1, grammar. 2 | # This just tests whether the parser accepts them all. 3 | 4 | # NOTE: When you run this test as a script from the command line, you 5 | # get warnings about certain hex/oct constants. Since those are 6 | # issued by the parser, you can't suppress them by adding a 7 | # filterwarnings() call to this module. Therefore, to shut up the 8 | # regression test, the filterwarnings() call has been added to 9 | # regrtest.py. 10 | 11 | from test.test_support import run_unittest, check_syntax_error 12 | import unittest 13 | import sys 14 | # testing import * 15 | from sys import * 16 | 17 | class TokenTests(unittest.TestCase): 18 | 19 | def testBackslash(self): 20 | # Backslash means line continuation: 21 | x = 1 \ 22 | + 1 23 | self.assertEquals(x, 2, 'backslash for line continuation') 24 | 25 | # Backslash does not means continuation in comments :\ 26 | x = 0 27 | self.assertEquals(x, 0, 'backslash ending comment') 28 | 29 | def testPlainIntegers(self): 30 | self.assertEquals(0xff, 255) 31 | self.assertEquals(0377, 255) 32 | self.assertEquals(2147483647, 017777777777) 33 | # "0x" is not a valid literal 34 | self.assertRaises(SyntaxError, eval, "0x") 35 | from sys import maxint 36 | if maxint == 2147483647: 37 | self.assertEquals(-2147483647-1, -020000000000) 38 | # XXX -2147483648 39 | self.assert_(037777777777 > 0) 40 | self.assert_(0xffffffff > 0) 41 | for s in '2147483648', '040000000000', '0x100000000': 42 | try: 43 | x = eval(s) 44 | except OverflowError: 45 | self.fail("OverflowError on huge integer literal %r" % s) 46 | elif maxint == 9223372036854775807: 47 | self.assertEquals(-9223372036854775807-1, -01000000000000000000000) 48 | self.assert_(01777777777777777777777 > 0) 49 | self.assert_(0xffffffffffffffff > 0) 50 | for s in '9223372036854775808', '02000000000000000000000', \ 51 | '0x10000000000000000': 52 | try: 53 | x = eval(s) 54 | except OverflowError: 55 | self.fail("OverflowError on huge integer literal %r" % s) 56 | else: 57 | self.fail('Weird maxint value %r' % maxint) 58 | 59 | def testLongIntegers(self): 60 | x = 0L 61 | x = 0l 62 | x = 0xffffffffffffffffL 63 | x = 0xffffffffffffffffl 64 | x = 077777777777777777L 65 | x = 077777777777777777l 66 | x = 123456789012345678901234567890L 67 | x = 123456789012345678901234567890l 68 | 69 | def testFloats(self): 70 | x = 3.14 71 | x = 314. 72 | x = 0.314 73 | # XXX x = 000.314 74 | x = .314 75 | x = 3e14 76 | x = 3E14 77 | x = 3e-14 78 | x = 3e+14 79 | x = 3.e14 80 | x = .3e14 81 | x = 3.1e4 82 | 83 | class GrammarTests(unittest.TestCase): 84 | 85 | # single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE 86 | # XXX can't test in a script -- this rule is only used when interactive 87 | 88 | # file_input: (NEWLINE | stmt)* ENDMARKER 89 | # Being tested as this very moment this very module 90 | 91 | # expr_input: testlist NEWLINE 92 | # XXX Hard to test -- used only in calls to input() 93 | 94 | def testEvalInput(self): 95 | # testlist ENDMARKER 96 | x = eval('1, 0 or 1') 97 | 98 | def testFuncdef(self): 99 | ### 'def' NAME parameters ':' suite 100 | ### parameters: '(' [varargslist] ')' 101 | ### varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' ('**'|'*' '*') NAME] 102 | ### | ('**'|'*' '*') NAME) 103 | ### | fpdef ['=' test] (',' fpdef ['=' test])* [','] 104 | ### fpdef: NAME | '(' fplist ')' 105 | ### fplist: fpdef (',' fpdef)* [','] 106 | ### arglist: (argument ',')* (argument | *' test [',' '**' test] | '**' test) 107 | ### argument: [test '='] test # Really [keyword '='] test 108 | def f1(): pass 109 | f1() 110 | f1(*()) 111 | f1(*(), **{}) 112 | def f2(one_argument): pass 113 | def f3(two, arguments): pass 114 | def f4(two, (compound, (argument, list))): pass 115 | def f5((compound, first), two): pass 116 | self.assertEquals(f2.func_code.co_varnames, ('one_argument',)) 117 | self.assertEquals(f3.func_code.co_varnames, ('two', 'arguments')) 118 | if sys.platform.startswith('java'): 119 | self.assertEquals(f4.func_code.co_varnames, 120 | ('two', '(compound, (argument, list))', 'compound', 'argument', 121 | 'list',)) 122 | self.assertEquals(f5.func_code.co_varnames, 123 | ('(compound, first)', 'two', 'compound', 'first')) 124 | else: 125 | self.assertEquals(f4.func_code.co_varnames, 126 | ('two', '.1', 'compound', 'argument', 'list')) 127 | self.assertEquals(f5.func_code.co_varnames, 128 | ('.0', 'two', 'compound', 'first')) 129 | def a1(one_arg,): pass 130 | def a2(two, args,): pass 131 | def v0(*rest): pass 132 | def v1(a, *rest): pass 133 | def v2(a, b, *rest): pass 134 | def v3(a, (b, c), *rest): return a, b, c, rest 135 | 136 | f1() 137 | f2(1) 138 | f2(1,) 139 | f3(1, 2) 140 | f3(1, 2,) 141 | f4(1, (2, (3, 4))) 142 | v0() 143 | v0(1) 144 | v0(1,) 145 | v0(1,2) 146 | v0(1,2,3,4,5,6,7,8,9,0) 147 | v1(1) 148 | v1(1,) 149 | v1(1,2) 150 | v1(1,2,3) 151 | v1(1,2,3,4,5,6,7,8,9,0) 152 | v2(1,2) 153 | v2(1,2,3) 154 | v2(1,2,3,4) 155 | v2(1,2,3,4,5,6,7,8,9,0) 156 | v3(1,(2,3)) 157 | v3(1,(2,3),4) 158 | v3(1,(2,3),4,5,6,7,8,9,0) 159 | 160 | # ceval unpacks the formal arguments into the first argcount names; 161 | # thus, the names nested inside tuples must appear after these names. 162 | if sys.platform.startswith('java'): 163 | self.assertEquals(v3.func_code.co_varnames, ('a', '(b, c)', 'rest', 'b', 'c')) 164 | else: 165 | self.assertEquals(v3.func_code.co_varnames, ('a', '.1', 'rest', 'b', 'c')) 166 | self.assertEquals(v3(1, (2, 3), 4), (1, 2, 3, (4,))) 167 | def d01(a=1): pass 168 | d01() 169 | d01(1) 170 | d01(*(1,)) 171 | d01(**{'a':2}) 172 | def d11(a, b=1): pass 173 | d11(1) 174 | d11(1, 2) 175 | d11(1, **{'b':2}) 176 | def d21(a, b, c=1): pass 177 | d21(1, 2) 178 | d21(1, 2, 3) 179 | d21(*(1, 2, 3)) 180 | d21(1, *(2, 3)) 181 | d21(1, 2, *(3,)) 182 | d21(1, 2, **{'c':3}) 183 | def d02(a=1, b=2): pass 184 | d02() 185 | d02(1) 186 | d02(1, 2) 187 | d02(*(1, 2)) 188 | d02(1, *(2,)) 189 | d02(1, **{'b':2}) 190 | d02(**{'a': 1, 'b': 2}) 191 | def d12(a, b=1, c=2): pass 192 | d12(1) 193 | d12(1, 2) 194 | d12(1, 2, 3) 195 | def d22(a, b, c=1, d=2): pass 196 | d22(1, 2) 197 | d22(1, 2, 3) 198 | d22(1, 2, 3, 4) 199 | def d01v(a=1, *rest): pass 200 | d01v() 201 | d01v(1) 202 | d01v(1, 2) 203 | d01v(*(1, 2, 3, 4)) 204 | d01v(*(1,)) 205 | d01v(**{'a':2}) 206 | def d11v(a, b=1, *rest): pass 207 | d11v(1) 208 | d11v(1, 2) 209 | d11v(1, 2, 3) 210 | def d21v(a, b, c=1, *rest): pass 211 | d21v(1, 2) 212 | d21v(1, 2, 3) 213 | d21v(1, 2, 3, 4) 214 | d21v(*(1, 2, 3, 4)) 215 | d21v(1, 2, **{'c': 3}) 216 | def d02v(a=1, b=2, *rest): pass 217 | d02v() 218 | d02v(1) 219 | d02v(1, 2) 220 | d02v(1, 2, 3) 221 | d02v(1, *(2, 3, 4)) 222 | d02v(**{'a': 1, 'b': 2}) 223 | def d12v(a, b=1, c=2, *rest): pass 224 | d12v(1) 225 | d12v(1, 2) 226 | d12v(1, 2, 3) 227 | d12v(1, 2, 3, 4) 228 | d12v(*(1, 2, 3, 4)) 229 | d12v(1, 2, *(3, 4, 5)) 230 | d12v(1, *(2,), **{'c': 3}) 231 | def d22v(a, b, c=1, d=2, *rest): pass 232 | d22v(1, 2) 233 | d22v(1, 2, 3) 234 | d22v(1, 2, 3, 4) 235 | d22v(1, 2, 3, 4, 5) 236 | d22v(*(1, 2, 3, 4)) 237 | d22v(1, 2, *(3, 4, 5)) 238 | d22v(1, *(2, 3), **{'d': 4}) 239 | def d31v((x)): pass 240 | d31v(1) 241 | def d32v((x,)): pass 242 | d32v((1,)) 243 | 244 | # keyword arguments after *arglist 245 | def f(*args, **kwargs): 246 | return args, kwargs 247 | self.assertEquals(f(1, x=2, *[3, 4], y=5), ((1, 3, 4), 248 | {'x':2, 'y':5})) 249 | self.assertRaises(SyntaxError, eval, "f(1, *(2,3), 4)") 250 | self.assertRaises(SyntaxError, eval, "f(1, x=2, *(3,4), x=5)") 251 | 252 | # Check ast errors in *args and *kwargs 253 | check_syntax_error(self, "f(*g(1=2))") 254 | check_syntax_error(self, "f(**g(1=2))") 255 | 256 | def testLambdef(self): 257 | ### lambdef: 'lambda' [varargslist] ':' test 258 | l1 = lambda : 0 259 | self.assertEquals(l1(), 0) 260 | l2 = lambda : a[d] # XXX just testing the expression 261 | l3 = lambda : [2 < x for x in [-1, 3, 0L]] 262 | self.assertEquals(l3(), [0, 1, 0]) 263 | l4 = lambda x = lambda y = lambda z=1 : z : y() : x() 264 | self.assertEquals(l4(), 1) 265 | l5 = lambda x, y, z=2: x + y + z 266 | self.assertEquals(l5(1, 2), 5) 267 | self.assertEquals(l5(1, 2, 3), 6) 268 | check_syntax_error(self, "lambda x: x = 2") 269 | check_syntax_error(self, "lambda (None,): None") 270 | 271 | ### stmt: simple_stmt | compound_stmt 272 | # Tested below 273 | 274 | def testSimpleStmt(self): 275 | ### simple_stmt: small_stmt (';' small_stmt)* [';'] 276 | x = 1; pass; del x 277 | def foo(): 278 | # verify statements that end with semi-colons 279 | x = 1; pass; del x; 280 | foo() 281 | 282 | ### small_stmt: expr_stmt | print_stmt | pass_stmt | del_stmt | flow_stmt | import_stmt | global_stmt | access_stmt | exec_stmt 283 | # Tested below 284 | 285 | def testExprStmt(self): 286 | # (exprlist '=')* exprlist 287 | 1 288 | 1, 2, 3 289 | x = 1 290 | x = 1, 2, 3 291 | x = y = z = 1, 2, 3 292 | x, y, z = 1, 2, 3 293 | abc = a, b, c = x, y, z = xyz = 1, 2, (3, 4) 294 | 295 | check_syntax_error(self, "x + 1 = 1") 296 | check_syntax_error(self, "a + 1 = b + 2") 297 | 298 | def testPrintStmt(self): 299 | # 'print' (test ',')* [test] 300 | import StringIO 301 | 302 | # Can't test printing to real stdout without comparing output 303 | # which is not available in unittest. 304 | save_stdout = sys.stdout 305 | sys.stdout = StringIO.StringIO() 306 | 307 | print 1, 2, 3 308 | print 1, 2, 3, 309 | print 310 | print 0 or 1, 0 or 1, 311 | print 0 or 1 312 | 313 | # 'print' '>>' test ',' 314 | print >> sys.stdout, 1, 2, 3 315 | print >> sys.stdout, 1, 2, 3, 316 | print >> sys.stdout 317 | print >> sys.stdout, 0 or 1, 0 or 1, 318 | print >> sys.stdout, 0 or 1 319 | 320 | # test printing to an instance 321 | class Gulp: 322 | def write(self, msg): pass 323 | 324 | gulp = Gulp() 325 | print >> gulp, 1, 2, 3 326 | print >> gulp, 1, 2, 3, 327 | print >> gulp 328 | print >> gulp, 0 or 1, 0 or 1, 329 | print >> gulp, 0 or 1 330 | 331 | # test print >> None 332 | def driver(): 333 | oldstdout = sys.stdout 334 | sys.stdout = Gulp() 335 | try: 336 | tellme(Gulp()) 337 | tellme() 338 | finally: 339 | sys.stdout = oldstdout 340 | 341 | # we should see this once 342 | def tellme(file=sys.stdout): 343 | print >> file, 'hello world' 344 | 345 | driver() 346 | 347 | # we should not see this at all 348 | def tellme(file=None): 349 | print >> file, 'goodbye universe' 350 | 351 | driver() 352 | 353 | self.assertEqual(sys.stdout.getvalue(), '''\ 354 | 1 2 3 355 | 1 2 3 356 | 1 1 1 357 | 1 2 3 358 | 1 2 3 359 | 1 1 1 360 | hello world 361 | ''') 362 | sys.stdout = save_stdout 363 | 364 | # syntax errors 365 | check_syntax_error(self, 'print ,') 366 | check_syntax_error(self, 'print >> x,') 367 | 368 | def testDelStmt(self): 369 | # 'del' exprlist 370 | abc = [1,2,3] 371 | x, y, z = abc 372 | xyz = x, y, z 373 | 374 | del abc 375 | del x, y, (z, xyz) 376 | 377 | def testPassStmt(self): 378 | # 'pass' 379 | pass 380 | 381 | # flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt 382 | # Tested below 383 | 384 | def testBreakStmt(self): 385 | # 'break' 386 | while 1: break 387 | 388 | def testContinueStmt(self): 389 | # 'continue' 390 | i = 1 391 | while i: i = 0; continue 392 | 393 | msg = "" 394 | while not msg: 395 | msg = "ok" 396 | try: 397 | continue 398 | msg = "continue failed to continue inside try" 399 | except: 400 | msg = "continue inside try called except block" 401 | if msg != "ok": 402 | self.fail(msg) 403 | 404 | msg = "" 405 | while not msg: 406 | msg = "finally block not called" 407 | try: 408 | continue 409 | finally: 410 | msg = "ok" 411 | if msg != "ok": 412 | self.fail(msg) 413 | 414 | def test_break_continue_loop(self): 415 | # This test warrants an explanation. It is a test specifically for SF bugs 416 | # #463359 and #462937. The bug is that a 'break' statement executed or 417 | # exception raised inside a try/except inside a loop, *after* a continue 418 | # statement has been executed in that loop, will cause the wrong number of 419 | # arguments to be popped off the stack and the instruction pointer reset to 420 | # a very small number (usually 0.) Because of this, the following test 421 | # *must* written as a function, and the tracking vars *must* be function 422 | # arguments with default values. Otherwise, the test will loop and loop. 423 | 424 | def test_inner(extra_burning_oil = 1, count=0): 425 | big_hippo = 2 426 | while big_hippo: 427 | count += 1 428 | try: 429 | if extra_burning_oil and big_hippo == 1: 430 | extra_burning_oil -= 1 431 | break 432 | big_hippo -= 1 433 | continue 434 | except: 435 | raise 436 | if count > 2 or big_hippo <> 1: 437 | self.fail("continue then break in try/except in loop broken!") 438 | test_inner() 439 | 440 | def testReturn(self): 441 | # 'return' [testlist] 442 | def g1(): return 443 | def g2(): return 1 444 | g1() 445 | x = g2() 446 | check_syntax_error(self, "class foo:return 1") 447 | 448 | def testYield(self): 449 | check_syntax_error(self, "class foo:yield 1") 450 | 451 | def testRaise(self): 452 | # 'raise' test [',' test] 453 | try: raise RuntimeError, 'just testing' 454 | except RuntimeError: pass 455 | try: raise KeyboardInterrupt 456 | except KeyboardInterrupt: pass 457 | 458 | def testImport(self): 459 | # 'import' dotted_as_names 460 | import sys 461 | import time, sys 462 | # 'from' dotted_name 'import' ('*' | '(' import_as_names ')' | import_as_names) 463 | from time import time 464 | from time import (time) 465 | # not testable inside a function, but already done at top of the module 466 | # from sys import * 467 | from sys import path, argv 468 | from sys import (path, argv) 469 | from sys import (path, argv,) 470 | 471 | def testGlobal(self): 472 | # 'global' NAME (',' NAME)* 473 | global a 474 | global a, b 475 | global one, two, three, four, five, six, seven, eight, nine, ten 476 | 477 | def testExec(self): 478 | # 'exec' expr ['in' expr [',' expr]] 479 | z = None 480 | del z 481 | exec 'z=1+1\n' 482 | if z != 2: self.fail('exec \'z=1+1\'\\n') 483 | del z 484 | exec 'z=1+1' 485 | if z != 2: self.fail('exec \'z=1+1\'') 486 | z = None 487 | del z 488 | import types 489 | if hasattr(types, "UnicodeType"): 490 | exec r"""if 1: 491 | exec u'z=1+1\n' 492 | if z != 2: self.fail('exec u\'z=1+1\'\\n') 493 | del z 494 | exec u'z=1+1' 495 | if z != 2: self.fail('exec u\'z=1+1\'')""" 496 | g = {} 497 | exec 'z = 1' in g 498 | if g.has_key('__builtins__'): del g['__builtins__'] 499 | if g != {'z': 1}: self.fail('exec \'z = 1\' in g') 500 | g = {} 501 | l = {} 502 | 503 | import warnings 504 | warnings.filterwarnings("ignore", "global statement", module="") 505 | exec 'global a; a = 1; b = 2' in g, l 506 | if g.has_key('__builtins__'): del g['__builtins__'] 507 | if l.has_key('__builtins__'): del l['__builtins__'] 508 | if (g, l) != ({'a':1}, {'b':2}): 509 | self.fail('exec ... in g (%s), l (%s)' %(g,l)) 510 | 511 | def testAssert(self): 512 | # assert_stmt: 'assert' test [',' test] 513 | assert 1 514 | assert 1, 1 515 | assert lambda x:x 516 | assert 1, lambda x:x+1 517 | try: 518 | assert 0, "msg" 519 | except AssertionError, e: 520 | self.assertEquals(e.args[0], "msg") 521 | else: 522 | if __debug__: 523 | self.fail("AssertionError not raised by assert 0") 524 | 525 | ### compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | funcdef | classdef 526 | # Tested below 527 | 528 | def testIf(self): 529 | # 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] 530 | if 1: pass 531 | if 1: pass 532 | else: pass 533 | if 0: pass 534 | elif 0: pass 535 | if 0: pass 536 | elif 0: pass 537 | elif 0: pass 538 | elif 0: pass 539 | else: pass 540 | 541 | def testWhile(self): 542 | # 'while' test ':' suite ['else' ':' suite] 543 | while 0: pass 544 | while 0: pass 545 | else: pass 546 | 547 | # Issue1920: "while 0" is optimized away, 548 | # ensure that the "else" clause is still present. 549 | x = 0 550 | while 0: 551 | x = 1 552 | else: 553 | x = 2 554 | self.assertEquals(x, 2) 555 | 556 | def testFor(self): 557 | # 'for' exprlist 'in' exprlist ':' suite ['else' ':' suite] 558 | for i in 1, 2, 3: pass 559 | for i, j, k in (): pass 560 | else: pass 561 | class Squares: 562 | def __init__(self, max): 563 | self.max = max 564 | self.sofar = [] 565 | def __len__(self): return len(self.sofar) 566 | def __getitem__(self, i): 567 | if not 0 <= i < self.max: raise IndexError 568 | n = len(self.sofar) 569 | while n <= i: 570 | self.sofar.append(n*n) 571 | n = n+1 572 | return self.sofar[i] 573 | n = 0 574 | for x in Squares(10): n = n+x 575 | if n != 285: 576 | self.fail('for over growing sequence') 577 | 578 | result = [] 579 | for x, in [(1,), (2,), (3,)]: 580 | result.append(x) 581 | self.assertEqual(result, [1, 2, 3]) 582 | 583 | def testTry(self): 584 | ### try_stmt: 'try' ':' suite (except_clause ':' suite)+ ['else' ':' suite] 585 | ### | 'try' ':' suite 'finally' ':' suite 586 | ### except_clause: 'except' [expr [('as' | ',') expr]] 587 | try: 588 | 1/0 589 | except ZeroDivisionError: 590 | pass 591 | else: 592 | pass 593 | try: 1/0 594 | except EOFError: pass 595 | except TypeError as msg: pass 596 | except RuntimeError, msg: pass 597 | except: pass 598 | else: pass 599 | try: 1/0 600 | except (EOFError, TypeError, ZeroDivisionError): pass 601 | try: 1/0 602 | except (EOFError, TypeError, ZeroDivisionError), msg: pass 603 | try: pass 604 | finally: pass 605 | 606 | def testSuite(self): 607 | # simple_stmt | NEWLINE INDENT NEWLINE* (stmt NEWLINE*)+ DEDENT 608 | if 1: pass 609 | if 1: 610 | pass 611 | if 1: 612 | # 613 | # 614 | # 615 | pass 616 | pass 617 | # 618 | pass 619 | # 620 | 621 | def testTest(self): 622 | ### and_test ('or' and_test)* 623 | ### and_test: not_test ('and' not_test)* 624 | ### not_test: 'not' not_test | comparison 625 | if not 1: pass 626 | if 1 and 1: pass 627 | if 1 or 1: pass 628 | if not not not 1: pass 629 | if not 1 and 1 and 1: pass 630 | if 1 and 1 or 1 and 1 and 1 or not 1 and 1: pass 631 | 632 | def testComparison(self): 633 | ### comparison: expr (comp_op expr)* 634 | ### comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' 635 | if 1: pass 636 | x = (1 == 1) 637 | if 1 == 1: pass 638 | if 1 != 1: pass 639 | if 1 <> 1: pass 640 | if 1 < 1: pass 641 | if 1 > 1: pass 642 | if 1 <= 1: pass 643 | if 1 >= 1: pass 644 | if 1 is 1: pass 645 | if 1 is not 1: pass 646 | if 1 in (): pass 647 | if 1 not in (): pass 648 | if 1 < 1 > 1 == 1 >= 1 <= 1 <> 1 != 1 in 1 not in 1 is 1 is not 1: pass 649 | 650 | def testBinaryMaskOps(self): 651 | x = 1 & 1 652 | x = 1 ^ 1 653 | x = 1 | 1 654 | 655 | def testShiftOps(self): 656 | x = 1 << 1 657 | x = 1 >> 1 658 | x = 1 << 1 >> 1 659 | 660 | def testAdditiveOps(self): 661 | x = 1 662 | x = 1 + 1 663 | x = 1 - 1 - 1 664 | x = 1 - 1 + 1 - 1 + 1 665 | 666 | def testMultiplicativeOps(self): 667 | x = 1 * 1 668 | x = 1 / 1 669 | x = 1 % 1 670 | x = 1 / 1 * 1 % 1 671 | 672 | def testUnaryOps(self): 673 | x = +1 674 | x = -1 675 | x = ~1 676 | x = ~1 ^ 1 & 1 | 1 & 1 ^ -1 677 | x = -1*1/1 + 1*1 - ---1*1 678 | 679 | def testSelectors(self): 680 | ### trailer: '(' [testlist] ')' | '[' subscript ']' | '.' NAME 681 | ### subscript: expr | [expr] ':' [expr] 682 | 683 | import sys, time 684 | c = sys.path[0] 685 | x = time.time() 686 | x = sys.modules['time'].time() 687 | a = '01234' 688 | c = a[0] 689 | c = a[-1] 690 | s = a[0:5] 691 | s = a[:5] 692 | s = a[0:] 693 | s = a[:] 694 | s = a[-5:] 695 | s = a[:-1] 696 | s = a[-4:-3] 697 | # A rough test of SF bug 1333982. http://python.org/sf/1333982 698 | # The testing here is fairly incomplete. 699 | # Test cases should include: commas with 1 and 2 colons 700 | d = {} 701 | d[1] = 1 702 | d[1,] = 2 703 | d[1,2] = 3 704 | d[1,2,3] = 4 705 | L = list(d) 706 | L.sort() 707 | self.assertEquals(str(L), '[1, (1,), (1, 2), (1, 2, 3)]') 708 | 709 | def testAtoms(self): 710 | ### atom: '(' [testlist] ')' | '[' [testlist] ']' | '{' [dictmaker] '}' | '`' testlist '`' | NAME | NUMBER | STRING 711 | ### dictmaker: test ':' test (',' test ':' test)* [','] 712 | 713 | x = (1) 714 | x = (1 or 2 or 3) 715 | x = (1 or 2 or 3, 2, 3) 716 | 717 | x = [] 718 | x = [1] 719 | x = [1 or 2 or 3] 720 | x = [1 or 2 or 3, 2, 3] 721 | x = [] 722 | 723 | x = {} 724 | x = {'one': 1} 725 | x = {'one': 1,} 726 | x = {'one' or 'two': 1 or 2} 727 | x = {'one': 1, 'two': 2} 728 | x = {'one': 1, 'two': 2,} 729 | x = {'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6} 730 | 731 | x = `x` 732 | x = `1 or 2 or 3` 733 | self.assertEqual(`1,2`, '(1, 2)') 734 | 735 | x = x 736 | x = 'x' 737 | x = 123 738 | 739 | ### exprlist: expr (',' expr)* [','] 740 | ### testlist: test (',' test)* [','] 741 | # These have been exercised enough above 742 | 743 | def testClassdef(self): 744 | # 'class' NAME ['(' [testlist] ')'] ':' suite 745 | class B: pass 746 | class B2(): pass 747 | class C1(B): pass 748 | class C2(B): pass 749 | class D(C1, C2, B): pass 750 | class C: 751 | def meth1(self): pass 752 | def meth2(self, arg): pass 753 | def meth3(self, a1, a2): pass 754 | # decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE 755 | # decorators: decorator+ 756 | # decorated: decorators (classdef | funcdef) 757 | def class_decorator(x): 758 | x.decorated = True 759 | return x 760 | @class_decorator 761 | class G: 762 | pass 763 | self.assertEqual(G.decorated, True) 764 | 765 | def testListcomps(self): 766 | # list comprehension tests 767 | nums = [1, 2, 3, 4, 5] 768 | strs = ["Apple", "Banana", "Coconut"] 769 | spcs = [" Apple", " Banana ", "Coco nut "] 770 | 771 | self.assertEqual([s.strip() for s in spcs], ['Apple', 'Banana', 'Coco nut']) 772 | self.assertEqual([3 * x for x in nums], [3, 6, 9, 12, 15]) 773 | self.assertEqual([x for x in nums if x > 2], [3, 4, 5]) 774 | self.assertEqual([(i, s) for i in nums for s in strs], 775 | [(1, 'Apple'), (1, 'Banana'), (1, 'Coconut'), 776 | (2, 'Apple'), (2, 'Banana'), (2, 'Coconut'), 777 | (3, 'Apple'), (3, 'Banana'), (3, 'Coconut'), 778 | (4, 'Apple'), (4, 'Banana'), (4, 'Coconut'), 779 | (5, 'Apple'), (5, 'Banana'), (5, 'Coconut')]) 780 | self.assertEqual([(i, s) for i in nums for s in [f for f in strs if "n" in f]], 781 | [(1, 'Banana'), (1, 'Coconut'), (2, 'Banana'), (2, 'Coconut'), 782 | (3, 'Banana'), (3, 'Coconut'), (4, 'Banana'), (4, 'Coconut'), 783 | (5, 'Banana'), (5, 'Coconut')]) 784 | self.assertEqual([(lambda a:[a**i for i in range(a+1)])(j) for j in range(5)], 785 | [[1], [1, 1], [1, 2, 4], [1, 3, 9, 27], [1, 4, 16, 64, 256]]) 786 | 787 | def test_in_func(l): 788 | return [None < x < 3 for x in l if x > 2] 789 | 790 | self.assertEqual(test_in_func(nums), [False, False, False]) 791 | 792 | def test_nested_front(): 793 | self.assertEqual([[y for y in [x, x + 1]] for x in [1,3,5]], 794 | [[1, 2], [3, 4], [5, 6]]) 795 | 796 | test_nested_front() 797 | 798 | check_syntax_error(self, "[i, s for i in nums for s in strs]") 799 | check_syntax_error(self, "[x if y]") 800 | 801 | suppliers = [ 802 | (1, "Boeing"), 803 | (2, "Ford"), 804 | (3, "Macdonalds") 805 | ] 806 | 807 | parts = [ 808 | (10, "Airliner"), 809 | (20, "Engine"), 810 | (30, "Cheeseburger") 811 | ] 812 | 813 | suppart = [ 814 | (1, 10), (1, 20), (2, 20), (3, 30) 815 | ] 816 | 817 | x = [ 818 | (sname, pname) 819 | for (sno, sname) in suppliers 820 | for (pno, pname) in parts 821 | for (sp_sno, sp_pno) in suppart 822 | if sno == sp_sno and pno == sp_pno 823 | ] 824 | 825 | self.assertEqual(x, [('Boeing', 'Airliner'), ('Boeing', 'Engine'), ('Ford', 'Engine'), 826 | ('Macdonalds', 'Cheeseburger')]) 827 | 828 | def testGenexps(self): 829 | # generator expression tests 830 | g = ([x for x in range(10)] for x in range(1)) 831 | self.assertEqual(g.next(), [x for x in range(10)]) 832 | try: 833 | g.next() 834 | self.fail('should produce StopIteration exception') 835 | except StopIteration: 836 | pass 837 | 838 | a = 1 839 | try: 840 | g = (a for d in a) 841 | g.next() 842 | self.fail('should produce TypeError') 843 | except TypeError: 844 | pass 845 | 846 | self.assertEqual(list((x, y) for x in 'abcd' for y in 'abcd'), [(x, y) for x in 'abcd' for y in 'abcd']) 847 | self.assertEqual(list((x, y) for x in 'ab' for y in 'xy'), [(x, y) for x in 'ab' for y in 'xy']) 848 | 849 | a = [x for x in range(10)] 850 | b = (x for x in (y for y in a)) 851 | self.assertEqual(sum(b), sum([x for x in range(10)])) 852 | 853 | self.assertEqual(sum(x**2 for x in range(10)), sum([x**2 for x in range(10)])) 854 | self.assertEqual(sum(x*x for x in range(10) if x%2), sum([x*x for x in range(10) if x%2])) 855 | self.assertEqual(sum(x for x in (y for y in range(10))), sum([x for x in range(10)])) 856 | self.assertEqual(sum(x for x in (y for y in (z for z in range(10)))), sum([x for x in range(10)])) 857 | self.assertEqual(sum(x for x in [y for y in (z for z in range(10))]), sum([x for x in range(10)])) 858 | self.assertEqual(sum(x for x in (y for y in (z for z in range(10) if True)) if True), sum([x for x in range(10)])) 859 | self.assertEqual(sum(x for x in (y for y in (z for z in range(10) if True) if False) if True), 0) 860 | check_syntax_error(self, "foo(x for x in range(10), 100)") 861 | check_syntax_error(self, "foo(100, x for x in range(10))") 862 | 863 | def testComprehensionSpecials(self): 864 | # test for outmost iterable precomputation 865 | x = 10; g = (i for i in range(x)); x = 5 866 | self.assertEqual(len(list(g)), 10) 867 | 868 | # This should hold, since we're only precomputing outmost iterable. 869 | x = 10; t = False; g = ((i,j) for i in range(x) if t for j in range(x)) 870 | x = 5; t = True; 871 | self.assertEqual([(i,j) for i in range(10) for j in range(5)], list(g)) 872 | 873 | # Grammar allows multiple adjacent 'if's in listcomps and genexps, 874 | # even though it's silly. Make sure it works (ifelse broke this.) 875 | self.assertEqual([ x for x in range(10) if x % 2 if x % 3 ], [1, 5, 7]) 876 | self.assertEqual(list(x for x in range(10) if x % 2 if x % 3), [1, 5, 7]) 877 | 878 | # verify unpacking single element tuples in listcomp/genexp. 879 | self.assertEqual([x for x, in [(4,), (5,), (6,)]], [4, 5, 6]) 880 | self.assertEqual(list(x for x, in [(7,), (8,), (9,)]), [7, 8, 9]) 881 | 882 | def test_with_statement(self): 883 | class manager(object): 884 | def __enter__(self): 885 | return (1, 2) 886 | def __exit__(self, *args): 887 | pass 888 | 889 | with manager(): 890 | pass 891 | with manager() as x: 892 | pass 893 | with manager() as (x, y): 894 | pass 895 | with manager(), manager(): 896 | pass 897 | with manager() as x, manager() as y: 898 | pass 899 | with manager() as x, manager(): 900 | pass 901 | 902 | def testIfElseExpr(self): 903 | # Test ifelse expressions in various cases 904 | def _checkeval(msg, ret): 905 | "helper to check that evaluation of expressions is done correctly" 906 | print x 907 | return ret 908 | 909 | self.assertEqual([ x() for x in lambda: True, lambda: False if x() ], [True]) 910 | self.assertEqual([ x() for x in (lambda: True, lambda: False) if x() ], [True]) 911 | self.assertEqual([ x(False) for x in (lambda x: False if x else True, lambda x: True if x else False) if x(False) ], [True]) 912 | self.assertEqual((5 if 1 else _checkeval("check 1", 0)), 5) 913 | self.assertEqual((_checkeval("check 2", 0) if 0 else 5), 5) 914 | self.assertEqual((5 and 6 if 0 else 1), 1) 915 | self.assertEqual(((5 and 6) if 0 else 1), 1) 916 | self.assertEqual((5 and (6 if 1 else 1)), 6) 917 | self.assertEqual((0 or _checkeval("check 3", 2) if 0 else 3), 3) 918 | self.assertEqual((1 or _checkeval("check 4", 2) if 1 else _checkeval("check 5", 3)), 1) 919 | self.assertEqual((0 or 5 if 1 else _checkeval("check 6", 3)), 5) 920 | self.assertEqual((not 5 if 1 else 1), False) 921 | self.assertEqual((not 5 if 0 else 1), 1) 922 | self.assertEqual((6 + 1 if 1 else 2), 7) 923 | self.assertEqual((6 - 1 if 1 else 2), 5) 924 | self.assertEqual((6 * 2 if 1 else 4), 12) 925 | self.assertEqual((6 / 2 if 1 else 3), 3) 926 | self.assertEqual((6 < 4 if 0 else 2), 2) 927 | 928 | def testStringLiterals(self): 929 | x = ''; y = ""; self.assert_(len(x) == 0 and x == y) 930 | x = '\''; y = "'"; self.assert_(len(x) == 1 and x == y and ord(x) == 39) 931 | x = '"'; y = "\""; self.assert_(len(x) == 1 and x == y and ord(x) == 34) 932 | x = "doesn't \"shrink\" does it" 933 | y = 'doesn\'t "shrink" does it' 934 | self.assert_(len(x) == 24 and x == y) 935 | x = "does \"shrink\" doesn't it" 936 | y = 'does "shrink" doesn\'t it' 937 | self.assert_(len(x) == 24 and x == y) 938 | x = """ 939 | The "quick" 940 | brown fox 941 | jumps over 942 | the 'lazy' dog. 943 | """ 944 | y = '\nThe "quick"\nbrown fox\njumps over\nthe \'lazy\' dog.\n' 945 | self.assertEquals(x, y) 946 | y = ''' 947 | The "quick" 948 | brown fox 949 | jumps over 950 | the 'lazy' dog. 951 | ''' 952 | self.assertEquals(x, y) 953 | y = "\n\ 954 | The \"quick\"\n\ 955 | brown fox\n\ 956 | jumps over\n\ 957 | the 'lazy' dog.\n\ 958 | " 959 | self.assertEquals(x, y) 960 | y = '\n\ 961 | The \"quick\"\n\ 962 | brown fox\n\ 963 | jumps over\n\ 964 | the \'lazy\' dog.\n\ 965 | ' 966 | self.assertEquals(x, y) 967 | 968 | 969 | 970 | def test_main(): 971 | run_unittest(TokenTests, GrammarTests) 972 | 973 | if __name__ == '__main__': 974 | test_main() 975 | 976 | -------------------------------------------------------------------------------- /examples/python3-grammar.py: -------------------------------------------------------------------------------- 1 | # Python test set -- part 1, grammar. 2 | # This just tests whether the parser accepts them all. 3 | 4 | # NOTE: When you run this test as a script from the command line, you 5 | # get warnings about certain hex/oct constants. Since those are 6 | # issued by the parser, you can't suppress them by adding a 7 | # filterwarnings() call to this module. Therefore, to shut up the 8 | # regression test, the filterwarnings() call has been added to 9 | # regrtest.py. 10 | 11 | from test.support import run_unittest, check_syntax_error 12 | import unittest 13 | import sys 14 | # testing import * 15 | from sys import * 16 | 17 | class TokenTests(unittest.TestCase): 18 | 19 | def testBackslash(self): 20 | # Backslash means line continuation: 21 | x = 1 \ 22 | + 1 23 | self.assertEquals(x, 2, 'backslash for line continuation') 24 | 25 | # Backslash does not means continuation in comments :\ 26 | x = 0 27 | self.assertEquals(x, 0, 'backslash ending comment') 28 | 29 | def testPlainIntegers(self): 30 | self.assertEquals(type(000), type(0)) 31 | self.assertEquals(0xff, 255) 32 | self.assertEquals(0o377, 255) 33 | self.assertEquals(2147483647, 0o17777777777) 34 | self.assertEquals(0b1001, 9) 35 | # "0x" is not a valid literal 36 | self.assertRaises(SyntaxError, eval, "0x") 37 | from sys import maxsize 38 | if maxsize == 2147483647: 39 | self.assertEquals(-2147483647-1, -0o20000000000) 40 | # XXX -2147483648 41 | self.assert_(0o37777777777 > 0) 42 | self.assert_(0xffffffff > 0) 43 | self.assert_(0b1111111111111111111111111111111 > 0) 44 | for s in ('2147483648', '0o40000000000', '0x100000000', 45 | '0b10000000000000000000000000000000'): 46 | try: 47 | x = eval(s) 48 | except OverflowError: 49 | self.fail("OverflowError on huge integer literal %r" % s) 50 | elif maxsize == 9223372036854775807: 51 | self.assertEquals(-9223372036854775807-1, -0o1000000000000000000000) 52 | self.assert_(0o1777777777777777777777 > 0) 53 | self.assert_(0xffffffffffffffff > 0) 54 | self.assert_(0b11111111111111111111111111111111111111111111111111111111111111 > 0) 55 | for s in '9223372036854775808', '0o2000000000000000000000', \ 56 | '0x10000000000000000', \ 57 | '0b100000000000000000000000000000000000000000000000000000000000000': 58 | try: 59 | x = eval(s) 60 | except OverflowError: 61 | self.fail("OverflowError on huge integer literal %r" % s) 62 | else: 63 | self.fail('Weird maxsize value %r' % maxsize) 64 | 65 | def testLongIntegers(self): 66 | x = 0 67 | x = 0xffffffffffffffff 68 | x = 0Xffffffffffffffff 69 | x = 0o77777777777777777 70 | x = 0O77777777777777777 71 | x = 123456789012345678901234567890 72 | x = 0b100000000000000000000000000000000000000000000000000000000000000000000 73 | x = 0B111111111111111111111111111111111111111111111111111111111111111111111 74 | 75 | def testUnderscoresInNumbers(self): 76 | # Integers 77 | x = 1_0 78 | x = 123_456_7_89 79 | x = 0xabc_123_4_5 80 | x = 0X_abc_123 81 | x = 0B11_01 82 | x = 0b_11_01 83 | x = 0o45_67 84 | x = 0O_45_67 85 | 86 | # Floats 87 | x = 3_1.4 88 | x = 03_1.4 89 | x = 3_1. 90 | x = .3_1 91 | x = 3.1_4 92 | x = 0_3.1_4 93 | x = 3e1_4 94 | x = 3_1e+4_1 95 | x = 3_1E-4_1 96 | 97 | def testFloats(self): 98 | x = 3.14 99 | x = 314. 100 | x = 0.314 101 | # XXX x = 000.314 102 | x = .314 103 | x = 3e14 104 | x = 3E14 105 | x = 3e-14 106 | x = 3e+14 107 | x = 3.e14 108 | x = .3e14 109 | x = 3.1e4 110 | 111 | def testEllipsis(self): 112 | x = ... 113 | self.assert_(x is Ellipsis) 114 | self.assertRaises(SyntaxError, eval, ".. .") 115 | 116 | class GrammarTests(unittest.TestCase): 117 | 118 | # single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE 119 | # XXX can't test in a script -- this rule is only used when interactive 120 | 121 | # file_input: (NEWLINE | stmt)* ENDMARKER 122 | # Being tested as this very moment this very module 123 | 124 | # expr_input: testlist NEWLINE 125 | # XXX Hard to test -- used only in calls to input() 126 | 127 | def testEvalInput(self): 128 | # testlist ENDMARKER 129 | x = eval('1, 0 or 1') 130 | 131 | def testFuncdef(self): 132 | ### [decorators] 'def' NAME parameters ['->' test] ':' suite 133 | ### decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE 134 | ### decorators: decorator+ 135 | ### parameters: '(' [typedargslist] ')' 136 | ### typedargslist: ((tfpdef ['=' test] ',')* 137 | ### ('*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) 138 | ### | tfpdef ['=' test] (',' tfpdef ['=' test])* [',']) 139 | ### tfpdef: NAME [':' test] 140 | ### varargslist: ((vfpdef ['=' test] ',')* 141 | ### ('*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) 142 | ### | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) 143 | ### vfpdef: NAME 144 | def f1(): pass 145 | f1() 146 | f1(*()) 147 | f1(*(), **{}) 148 | def f2(one_argument): pass 149 | def f3(two, arguments): pass 150 | self.assertEquals(f2.__code__.co_varnames, ('one_argument',)) 151 | self.assertEquals(f3.__code__.co_varnames, ('two', 'arguments')) 152 | def a1(one_arg,): pass 153 | def a2(two, args,): pass 154 | def v0(*rest): pass 155 | def v1(a, *rest): pass 156 | def v2(a, b, *rest): pass 157 | 158 | f1() 159 | f2(1) 160 | f2(1,) 161 | f3(1, 2) 162 | f3(1, 2,) 163 | v0() 164 | v0(1) 165 | v0(1,) 166 | v0(1,2) 167 | v0(1,2,3,4,5,6,7,8,9,0) 168 | v1(1) 169 | v1(1,) 170 | v1(1,2) 171 | v1(1,2,3) 172 | v1(1,2,3,4,5,6,7,8,9,0) 173 | v2(1,2) 174 | v2(1,2,3) 175 | v2(1,2,3,4) 176 | v2(1,2,3,4,5,6,7,8,9,0) 177 | 178 | def d01(a=1): pass 179 | d01() 180 | d01(1) 181 | d01(*(1,)) 182 | d01(**{'a':2}) 183 | def d11(a, b=1): pass 184 | d11(1) 185 | d11(1, 2) 186 | d11(1, **{'b':2}) 187 | def d21(a, b, c=1): pass 188 | d21(1, 2) 189 | d21(1, 2, 3) 190 | d21(*(1, 2, 3)) 191 | d21(1, *(2, 3)) 192 | d21(1, 2, *(3,)) 193 | d21(1, 2, **{'c':3}) 194 | def d02(a=1, b=2): pass 195 | d02() 196 | d02(1) 197 | d02(1, 2) 198 | d02(*(1, 2)) 199 | d02(1, *(2,)) 200 | d02(1, **{'b':2}) 201 | d02(**{'a': 1, 'b': 2}) 202 | def d12(a, b=1, c=2): pass 203 | d12(1) 204 | d12(1, 2) 205 | d12(1, 2, 3) 206 | def d22(a, b, c=1, d=2): pass 207 | d22(1, 2) 208 | d22(1, 2, 3) 209 | d22(1, 2, 3, 4) 210 | def d01v(a=1, *rest): pass 211 | d01v() 212 | d01v(1) 213 | d01v(1, 2) 214 | d01v(*(1, 2, 3, 4)) 215 | d01v(*(1,)) 216 | d01v(**{'a':2}) 217 | def d11v(a, b=1, *rest): pass 218 | d11v(1) 219 | d11v(1, 2) 220 | d11v(1, 2, 3) 221 | def d21v(a, b, c=1, *rest): pass 222 | d21v(1, 2) 223 | d21v(1, 2, 3) 224 | d21v(1, 2, 3, 4) 225 | d21v(*(1, 2, 3, 4)) 226 | d21v(1, 2, **{'c': 3}) 227 | def d02v(a=1, b=2, *rest): pass 228 | d02v() 229 | d02v(1) 230 | d02v(1, 2) 231 | d02v(1, 2, 3) 232 | d02v(1, *(2, 3, 4)) 233 | d02v(**{'a': 1, 'b': 2}) 234 | def d12v(a, b=1, c=2, *rest): pass 235 | d12v(1) 236 | d12v(1, 2) 237 | d12v(1, 2, 3) 238 | d12v(1, 2, 3, 4) 239 | d12v(*(1, 2, 3, 4)) 240 | d12v(1, 2, *(3, 4, 5)) 241 | d12v(1, *(2,), **{'c': 3}) 242 | def d22v(a, b, c=1, d=2, *rest): pass 243 | d22v(1, 2) 244 | d22v(1, 2, 3) 245 | d22v(1, 2, 3, 4) 246 | d22v(1, 2, 3, 4, 5) 247 | d22v(*(1, 2, 3, 4)) 248 | d22v(1, 2, *(3, 4, 5)) 249 | d22v(1, *(2, 3), **{'d': 4}) 250 | 251 | # keyword argument type tests 252 | try: 253 | str('x', **{b'foo':1 }) 254 | except TypeError: 255 | pass 256 | else: 257 | self.fail('Bytes should not work as keyword argument names') 258 | # keyword only argument tests 259 | def pos0key1(*, key): return key 260 | pos0key1(key=100) 261 | def pos2key2(p1, p2, *, k1, k2=100): return p1,p2,k1,k2 262 | pos2key2(1, 2, k1=100) 263 | pos2key2(1, 2, k1=100, k2=200) 264 | pos2key2(1, 2, k2=100, k1=200) 265 | def pos2key2dict(p1, p2, *, k1=100, k2, **kwarg): return p1,p2,k1,k2,kwarg 266 | pos2key2dict(1,2,k2=100,tokwarg1=100,tokwarg2=200) 267 | pos2key2dict(1,2,tokwarg1=100,tokwarg2=200, k2=100) 268 | 269 | # keyword arguments after *arglist 270 | def f(*args, **kwargs): 271 | return args, kwargs 272 | self.assertEquals(f(1, x=2, *[3, 4], y=5), ((1, 3, 4), 273 | {'x':2, 'y':5})) 274 | self.assertRaises(SyntaxError, eval, "f(1, *(2,3), 4)") 275 | self.assertRaises(SyntaxError, eval, "f(1, x=2, *(3,4), x=5)") 276 | 277 | # argument annotation tests 278 | def f(x) -> list: pass 279 | self.assertEquals(f.__annotations__, {'return': list}) 280 | def f(x:int): pass 281 | self.assertEquals(f.__annotations__, {'x': int}) 282 | def f(*x:str): pass 283 | self.assertEquals(f.__annotations__, {'x': str}) 284 | def f(**x:float): pass 285 | self.assertEquals(f.__annotations__, {'x': float}) 286 | def f(x, y:1+2): pass 287 | self.assertEquals(f.__annotations__, {'y': 3}) 288 | def f(a, b:1, c:2, d): pass 289 | self.assertEquals(f.__annotations__, {'b': 1, 'c': 2}) 290 | def f(a, b:1, c:2, d, e:3=4, f=5, *g:6): pass 291 | self.assertEquals(f.__annotations__, 292 | {'b': 1, 'c': 2, 'e': 3, 'g': 6}) 293 | def f(a, b:1, c:2, d, e:3=4, f=5, *g:6, h:7, i=8, j:9=10, 294 | **k:11) -> 12: pass 295 | self.assertEquals(f.__annotations__, 296 | {'b': 1, 'c': 2, 'e': 3, 'g': 6, 'h': 7, 'j': 9, 297 | 'k': 11, 'return': 12}) 298 | # Check for SF Bug #1697248 - mixing decorators and a return annotation 299 | def null(x): return x 300 | @null 301 | def f(x) -> list: pass 302 | self.assertEquals(f.__annotations__, {'return': list}) 303 | 304 | # test closures with a variety of oparg's 305 | closure = 1 306 | def f(): return closure 307 | def f(x=1): return closure 308 | def f(*, k=1): return closure 309 | def f() -> int: return closure 310 | 311 | # Check ast errors in *args and *kwargs 312 | check_syntax_error(self, "f(*g(1=2))") 313 | check_syntax_error(self, "f(**g(1=2))") 314 | 315 | def testLambdef(self): 316 | ### lambdef: 'lambda' [varargslist] ':' test 317 | l1 = lambda : 0 318 | self.assertEquals(l1(), 0) 319 | l2 = lambda : a[d] # XXX just testing the expression 320 | l3 = lambda : [2 < x for x in [-1, 3, 0]] 321 | self.assertEquals(l3(), [0, 1, 0]) 322 | l4 = lambda x = lambda y = lambda z=1 : z : y() : x() 323 | self.assertEquals(l4(), 1) 324 | l5 = lambda x, y, z=2: x + y + z 325 | self.assertEquals(l5(1, 2), 5) 326 | self.assertEquals(l5(1, 2, 3), 6) 327 | check_syntax_error(self, "lambda x: x = 2") 328 | check_syntax_error(self, "lambda (None,): None") 329 | l6 = lambda x, y, *, k=20: x+y+k 330 | self.assertEquals(l6(1,2), 1+2+20) 331 | self.assertEquals(l6(1,2,k=10), 1+2+10) 332 | 333 | 334 | ### stmt: simple_stmt | compound_stmt 335 | # Tested below 336 | 337 | def testSimpleStmt(self): 338 | ### simple_stmt: small_stmt (';' small_stmt)* [';'] 339 | x = 1; pass; del x 340 | def foo(): 341 | # verify statements that end with semi-colons 342 | x = 1; pass; del x; 343 | foo() 344 | 345 | ### small_stmt: expr_stmt | pass_stmt | del_stmt | flow_stmt | import_stmt | global_stmt | access_stmt 346 | # Tested below 347 | 348 | def testExprStmt(self): 349 | # (exprlist '=')* exprlist 350 | 1 351 | 1, 2, 3 352 | x = 1 353 | x = 1, 2, 3 354 | x = y = z = 1, 2, 3 355 | x, y, z = 1, 2, 3 356 | abc = a, b, c = x, y, z = xyz = 1, 2, (3, 4) 357 | 358 | check_syntax_error(self, "x + 1 = 1") 359 | check_syntax_error(self, "a + 1 = b + 2") 360 | 361 | def testDelStmt(self): 362 | # 'del' exprlist 363 | abc = [1,2,3] 364 | x, y, z = abc 365 | xyz = x, y, z 366 | 367 | del abc 368 | del x, y, (z, xyz) 369 | 370 | def testPassStmt(self): 371 | # 'pass' 372 | pass 373 | 374 | # flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt 375 | # Tested below 376 | 377 | def testBreakStmt(self): 378 | # 'break' 379 | while 1: break 380 | 381 | def testContinueStmt(self): 382 | # 'continue' 383 | i = 1 384 | while i: i = 0; continue 385 | 386 | msg = "" 387 | while not msg: 388 | msg = "ok" 389 | try: 390 | continue 391 | msg = "continue failed to continue inside try" 392 | except: 393 | msg = "continue inside try called except block" 394 | if msg != "ok": 395 | self.fail(msg) 396 | 397 | msg = "" 398 | while not msg: 399 | msg = "finally block not called" 400 | try: 401 | continue 402 | finally: 403 | msg = "ok" 404 | if msg != "ok": 405 | self.fail(msg) 406 | 407 | def test_break_continue_loop(self): 408 | # This test warrants an explanation. It is a test specifically for SF bugs 409 | # #463359 and #462937. The bug is that a 'break' statement executed or 410 | # exception raised inside a try/except inside a loop, *after* a continue 411 | # statement has been executed in that loop, will cause the wrong number of 412 | # arguments to be popped off the stack and the instruction pointer reset to 413 | # a very small number (usually 0.) Because of this, the following test 414 | # *must* written as a function, and the tracking vars *must* be function 415 | # arguments with default values. Otherwise, the test will loop and loop. 416 | 417 | def test_inner(extra_burning_oil = 1, count=0): 418 | big_hippo = 2 419 | while big_hippo: 420 | count += 1 421 | try: 422 | if extra_burning_oil and big_hippo == 1: 423 | extra_burning_oil -= 1 424 | break 425 | big_hippo -= 1 426 | continue 427 | except: 428 | raise 429 | if count > 2 or big_hippo != 1: 430 | self.fail("continue then break in try/except in loop broken!") 431 | test_inner() 432 | 433 | def testReturn(self): 434 | # 'return' [testlist] 435 | def g1(): return 436 | def g2(): return 1 437 | g1() 438 | x = g2() 439 | check_syntax_error(self, "class foo:return 1") 440 | 441 | def testYield(self): 442 | check_syntax_error(self, "class foo:yield 1") 443 | 444 | def testRaise(self): 445 | # 'raise' test [',' test] 446 | try: raise RuntimeError('just testing') 447 | except RuntimeError: pass 448 | try: raise KeyboardInterrupt 449 | except KeyboardInterrupt: pass 450 | 451 | def testImport(self): 452 | # 'import' dotted_as_names 453 | import sys 454 | import time, sys 455 | # 'from' dotted_name 'import' ('*' | '(' import_as_names ')' | import_as_names) 456 | from time import time 457 | from time import (time) 458 | # not testable inside a function, but already done at top of the module 459 | # from sys import * 460 | from sys import path, argv 461 | from sys import (path, argv) 462 | from sys import (path, argv,) 463 | 464 | def testGlobal(self): 465 | # 'global' NAME (',' NAME)* 466 | global a 467 | global a, b 468 | global one, two, three, four, five, six, seven, eight, nine, ten 469 | 470 | def testNonlocal(self): 471 | # 'nonlocal' NAME (',' NAME)* 472 | x = 0 473 | y = 0 474 | def f(): 475 | nonlocal x 476 | nonlocal x, y 477 | 478 | def testAssert(self): 479 | # assert_stmt: 'assert' test [',' test] 480 | assert 1 481 | assert 1, 1 482 | assert lambda x:x 483 | assert 1, lambda x:x+1 484 | try: 485 | assert 0, "msg" 486 | except AssertionError as e: 487 | self.assertEquals(e.args[0], "msg") 488 | else: 489 | if __debug__: 490 | self.fail("AssertionError not raised by assert 0") 491 | 492 | ### compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | funcdef | classdef 493 | # Tested below 494 | 495 | def testIf(self): 496 | # 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] 497 | if 1: pass 498 | if 1: pass 499 | else: pass 500 | if 0: pass 501 | elif 0: pass 502 | if 0: pass 503 | elif 0: pass 504 | elif 0: pass 505 | elif 0: pass 506 | else: pass 507 | 508 | def testWhile(self): 509 | # 'while' test ':' suite ['else' ':' suite] 510 | while 0: pass 511 | while 0: pass 512 | else: pass 513 | 514 | # Issue1920: "while 0" is optimized away, 515 | # ensure that the "else" clause is still present. 516 | x = 0 517 | while 0: 518 | x = 1 519 | else: 520 | x = 2 521 | self.assertEquals(x, 2) 522 | 523 | def testFor(self): 524 | # 'for' exprlist 'in' exprlist ':' suite ['else' ':' suite] 525 | for i in 1, 2, 3: pass 526 | for i, j, k in (): pass 527 | else: pass 528 | class Squares: 529 | def __init__(self, max): 530 | self.max = max 531 | self.sofar = [] 532 | def __len__(self): return len(self.sofar) 533 | def __getitem__(self, i): 534 | if not 0 <= i < self.max: raise IndexError 535 | n = len(self.sofar) 536 | while n <= i: 537 | self.sofar.append(n*n) 538 | n = n+1 539 | return self.sofar[i] 540 | n = 0 541 | for x in Squares(10): n = n+x 542 | if n != 285: 543 | self.fail('for over growing sequence') 544 | 545 | result = [] 546 | for x, in [(1,), (2,), (3,)]: 547 | result.append(x) 548 | self.assertEqual(result, [1, 2, 3]) 549 | 550 | def testTry(self): 551 | ### try_stmt: 'try' ':' suite (except_clause ':' suite)+ ['else' ':' suite] 552 | ### | 'try' ':' suite 'finally' ':' suite 553 | ### except_clause: 'except' [expr ['as' expr]] 554 | try: 555 | 1/0 556 | except ZeroDivisionError: 557 | pass 558 | else: 559 | pass 560 | try: 1/0 561 | except EOFError: pass 562 | except TypeError as msg: pass 563 | except RuntimeError as msg: pass 564 | except: pass 565 | else: pass 566 | try: 1/0 567 | except (EOFError, TypeError, ZeroDivisionError): pass 568 | try: 1/0 569 | except (EOFError, TypeError, ZeroDivisionError) as msg: pass 570 | try: pass 571 | finally: pass 572 | 573 | def testSuite(self): 574 | # simple_stmt | NEWLINE INDENT NEWLINE* (stmt NEWLINE*)+ DEDENT 575 | if 1: pass 576 | if 1: 577 | pass 578 | if 1: 579 | # 580 | # 581 | # 582 | pass 583 | pass 584 | # 585 | pass 586 | # 587 | 588 | def testTest(self): 589 | ### and_test ('or' and_test)* 590 | ### and_test: not_test ('and' not_test)* 591 | ### not_test: 'not' not_test | comparison 592 | if not 1: pass 593 | if 1 and 1: pass 594 | if 1 or 1: pass 595 | if not not not 1: pass 596 | if not 1 and 1 and 1: pass 597 | if 1 and 1 or 1 and 1 and 1 or not 1 and 1: pass 598 | 599 | def testComparison(self): 600 | ### comparison: expr (comp_op expr)* 601 | ### comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'|'is' 'not' 602 | if 1: pass 603 | x = (1 == 1) 604 | if 1 == 1: pass 605 | if 1 != 1: pass 606 | if 1 < 1: pass 607 | if 1 > 1: pass 608 | if 1 <= 1: pass 609 | if 1 >= 1: pass 610 | if 1 is 1: pass 611 | if 1 is not 1: pass 612 | if 1 in (): pass 613 | if 1 not in (): pass 614 | if 1 < 1 > 1 == 1 >= 1 <= 1 != 1 in 1 not in 1 is 1 is not 1: pass 615 | 616 | def testBinaryMaskOps(self): 617 | x = 1 & 1 618 | x = 1 ^ 1 619 | x = 1 | 1 620 | 621 | def testShiftOps(self): 622 | x = 1 << 1 623 | x = 1 >> 1 624 | x = 1 << 1 >> 1 625 | 626 | def testAdditiveOps(self): 627 | x = 1 628 | x = 1 + 1 629 | x = 1 - 1 - 1 630 | x = 1 - 1 + 1 - 1 + 1 631 | 632 | def testMultiplicativeOps(self): 633 | x = 1 * 1 634 | x = 1 / 1 635 | x = 1 % 1 636 | x = 1 / 1 * 1 % 1 637 | 638 | def testUnaryOps(self): 639 | x = +1 640 | x = -1 641 | x = ~1 642 | x = ~1 ^ 1 & 1 | 1 & 1 ^ -1 643 | x = -1*1/1 + 1*1 - ---1*1 644 | 645 | def testSelectors(self): 646 | ### trailer: '(' [testlist] ')' | '[' subscript ']' | '.' NAME 647 | ### subscript: expr | [expr] ':' [expr] 648 | 649 | import sys, time 650 | c = sys.path[0] 651 | x = time.time() 652 | x = sys.modules['time'].time() 653 | a = '01234' 654 | c = a[0] 655 | c = a[-1] 656 | s = a[0:5] 657 | s = a[:5] 658 | s = a[0:] 659 | s = a[:] 660 | s = a[-5:] 661 | s = a[:-1] 662 | s = a[-4:-3] 663 | # A rough test of SF bug 1333982. http://python.org/sf/1333982 664 | # The testing here is fairly incomplete. 665 | # Test cases should include: commas with 1 and 2 colons 666 | d = {} 667 | d[1] = 1 668 | d[1,] = 2 669 | d[1,2] = 3 670 | d[1,2,3] = 4 671 | L = list(d) 672 | L.sort(key=lambda x: x if isinstance(x, tuple) else ()) 673 | self.assertEquals(str(L), '[1, (1,), (1, 2), (1, 2, 3)]') 674 | 675 | def testAtoms(self): 676 | ### atom: '(' [testlist] ')' | '[' [testlist] ']' | '{' [dictsetmaker] '}' | NAME | NUMBER | STRING 677 | ### dictsetmaker: (test ':' test (',' test ':' test)* [',']) | (test (',' test)* [',']) 678 | 679 | x = (1) 680 | x = (1 or 2 or 3) 681 | x = (1 or 2 or 3, 2, 3) 682 | 683 | x = [] 684 | x = [1] 685 | x = [1 or 2 or 3] 686 | x = [1 or 2 or 3, 2, 3] 687 | x = [] 688 | 689 | x = {} 690 | x = {'one': 1} 691 | x = {'one': 1,} 692 | x = {'one' or 'two': 1 or 2} 693 | x = {'one': 1, 'two': 2} 694 | x = {'one': 1, 'two': 2,} 695 | x = {'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6} 696 | 697 | x = {'one'} 698 | x = {'one', 1,} 699 | x = {'one', 'two', 'three'} 700 | x = {2, 3, 4,} 701 | 702 | x = x 703 | x = 'x' 704 | x = 123 705 | 706 | ### exprlist: expr (',' expr)* [','] 707 | ### testlist: test (',' test)* [','] 708 | # These have been exercised enough above 709 | 710 | def testClassdef(self): 711 | # 'class' NAME ['(' [testlist] ')'] ':' suite 712 | class B: pass 713 | class B2(): pass 714 | class C1(B): pass 715 | class C2(B): pass 716 | class D(C1, C2, B): pass 717 | class C: 718 | def meth1(self): pass 719 | def meth2(self, arg): pass 720 | def meth3(self, a1, a2): pass 721 | 722 | # decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE 723 | # decorators: decorator+ 724 | # decorated: decorators (classdef | funcdef) 725 | def class_decorator(x): return x 726 | @class_decorator 727 | class G: pass 728 | 729 | def testDictcomps(self): 730 | # dictorsetmaker: ( (test ':' test (comp_for | 731 | # (',' test ':' test)* [','])) | 732 | # (test (comp_for | (',' test)* [','])) ) 733 | nums = [1, 2, 3] 734 | self.assertEqual({i:i+1 for i in nums}, {1: 2, 2: 3, 3: 4}) 735 | 736 | def testListcomps(self): 737 | # list comprehension tests 738 | nums = [1, 2, 3, 4, 5] 739 | strs = ["Apple", "Banana", "Coconut"] 740 | spcs = [" Apple", " Banana ", "Coco nut "] 741 | 742 | self.assertEqual([s.strip() for s in spcs], ['Apple', 'Banana', 'Coco nut']) 743 | self.assertEqual([3 * x for x in nums], [3, 6, 9, 12, 15]) 744 | self.assertEqual([x for x in nums if x > 2], [3, 4, 5]) 745 | self.assertEqual([(i, s) for i in nums for s in strs], 746 | [(1, 'Apple'), (1, 'Banana'), (1, 'Coconut'), 747 | (2, 'Apple'), (2, 'Banana'), (2, 'Coconut'), 748 | (3, 'Apple'), (3, 'Banana'), (3, 'Coconut'), 749 | (4, 'Apple'), (4, 'Banana'), (4, 'Coconut'), 750 | (5, 'Apple'), (5, 'Banana'), (5, 'Coconut')]) 751 | self.assertEqual([(i, s) for i in nums for s in [f for f in strs if "n" in f]], 752 | [(1, 'Banana'), (1, 'Coconut'), (2, 'Banana'), (2, 'Coconut'), 753 | (3, 'Banana'), (3, 'Coconut'), (4, 'Banana'), (4, 'Coconut'), 754 | (5, 'Banana'), (5, 'Coconut')]) 755 | self.assertEqual([(lambda a:[a**i for i in range(a+1)])(j) for j in range(5)], 756 | [[1], [1, 1], [1, 2, 4], [1, 3, 9, 27], [1, 4, 16, 64, 256]]) 757 | 758 | def test_in_func(l): 759 | return [0 < x < 3 for x in l if x > 2] 760 | 761 | self.assertEqual(test_in_func(nums), [False, False, False]) 762 | 763 | def test_nested_front(): 764 | self.assertEqual([[y for y in [x, x + 1]] for x in [1,3,5]], 765 | [[1, 2], [3, 4], [5, 6]]) 766 | 767 | test_nested_front() 768 | 769 | check_syntax_error(self, "[i, s for i in nums for s in strs]") 770 | check_syntax_error(self, "[x if y]") 771 | 772 | suppliers = [ 773 | (1, "Boeing"), 774 | (2, "Ford"), 775 | (3, "Macdonalds") 776 | ] 777 | 778 | parts = [ 779 | (10, "Airliner"), 780 | (20, "Engine"), 781 | (30, "Cheeseburger") 782 | ] 783 | 784 | suppart = [ 785 | (1, 10), (1, 20), (2, 20), (3, 30) 786 | ] 787 | 788 | x = [ 789 | (sname, pname) 790 | for (sno, sname) in suppliers 791 | for (pno, pname) in parts 792 | for (sp_sno, sp_pno) in suppart 793 | if sno == sp_sno and pno == sp_pno 794 | ] 795 | 796 | self.assertEqual(x, [('Boeing', 'Airliner'), ('Boeing', 'Engine'), ('Ford', 'Engine'), 797 | ('Macdonalds', 'Cheeseburger')]) 798 | 799 | def testGenexps(self): 800 | # generator expression tests 801 | g = ([x for x in range(10)] for x in range(1)) 802 | self.assertEqual(next(g), [x for x in range(10)]) 803 | try: 804 | next(g) 805 | self.fail('should produce StopIteration exception') 806 | except StopIteration: 807 | pass 808 | 809 | a = 1 810 | try: 811 | g = (a for d in a) 812 | next(g) 813 | self.fail('should produce TypeError') 814 | except TypeError: 815 | pass 816 | 817 | self.assertEqual(list((x, y) for x in 'abcd' for y in 'abcd'), [(x, y) for x in 'abcd' for y in 'abcd']) 818 | self.assertEqual(list((x, y) for x in 'ab' for y in 'xy'), [(x, y) for x in 'ab' for y in 'xy']) 819 | 820 | a = [x for x in range(10)] 821 | b = (x for x in (y for y in a)) 822 | self.assertEqual(sum(b), sum([x for x in range(10)])) 823 | 824 | self.assertEqual(sum(x**2 for x in range(10)), sum([x**2 for x in range(10)])) 825 | self.assertEqual(sum(x*x for x in range(10) if x%2), sum([x*x for x in range(10) if x%2])) 826 | self.assertEqual(sum(x for x in (y for y in range(10))), sum([x for x in range(10)])) 827 | self.assertEqual(sum(x for x in (y for y in (z for z in range(10)))), sum([x for x in range(10)])) 828 | self.assertEqual(sum(x for x in [y for y in (z for z in range(10))]), sum([x for x in range(10)])) 829 | self.assertEqual(sum(x for x in (y for y in (z for z in range(10) if True)) if True), sum([x for x in range(10)])) 830 | self.assertEqual(sum(x for x in (y for y in (z for z in range(10) if True) if False) if True), 0) 831 | check_syntax_error(self, "foo(x for x in range(10), 100)") 832 | check_syntax_error(self, "foo(100, x for x in range(10))") 833 | 834 | def testComprehensionSpecials(self): 835 | # test for outmost iterable precomputation 836 | x = 10; g = (i for i in range(x)); x = 5 837 | self.assertEqual(len(list(g)), 10) 838 | 839 | # This should hold, since we're only precomputing outmost iterable. 840 | x = 10; t = False; g = ((i,j) for i in range(x) if t for j in range(x)) 841 | x = 5; t = True; 842 | self.assertEqual([(i,j) for i in range(10) for j in range(5)], list(g)) 843 | 844 | # Grammar allows multiple adjacent 'if's in listcomps and genexps, 845 | # even though it's silly. Make sure it works (ifelse broke this.) 846 | self.assertEqual([ x for x in range(10) if x % 2 if x % 3 ], [1, 5, 7]) 847 | self.assertEqual(list(x for x in range(10) if x % 2 if x % 3), [1, 5, 7]) 848 | 849 | # verify unpacking single element tuples in listcomp/genexp. 850 | self.assertEqual([x for x, in [(4,), (5,), (6,)]], [4, 5, 6]) 851 | self.assertEqual(list(x for x, in [(7,), (8,), (9,)]), [7, 8, 9]) 852 | 853 | def test_with_statement(self): 854 | class manager(object): 855 | def __enter__(self): 856 | return (1, 2) 857 | def __exit__(self, *args): 858 | pass 859 | 860 | with manager(): 861 | pass 862 | with manager() as x: 863 | pass 864 | with manager() as (x, y): 865 | pass 866 | with manager(), manager(): 867 | pass 868 | with manager() as x, manager() as y: 869 | pass 870 | with manager() as x, manager(): 871 | pass 872 | 873 | def testIfElseExpr(self): 874 | # Test ifelse expressions in various cases 875 | def _checkeval(msg, ret): 876 | "helper to check that evaluation of expressions is done correctly" 877 | print(x) 878 | return ret 879 | 880 | # the next line is not allowed anymore 881 | #self.assertEqual([ x() for x in lambda: True, lambda: False if x() ], [True]) 882 | self.assertEqual([ x() for x in (lambda: True, lambda: False) if x() ], [True]) 883 | self.assertEqual([ x(False) for x in (lambda x: False if x else True, lambda x: True if x else False) if x(False) ], [True]) 884 | self.assertEqual((5 if 1 else _checkeval("check 1", 0)), 5) 885 | self.assertEqual((_checkeval("check 2", 0) if 0 else 5), 5) 886 | self.assertEqual((5 and 6 if 0 else 1), 1) 887 | self.assertEqual(((5 and 6) if 0 else 1), 1) 888 | self.assertEqual((5 and (6 if 1 else 1)), 6) 889 | self.assertEqual((0 or _checkeval("check 3", 2) if 0 else 3), 3) 890 | self.assertEqual((1 or _checkeval("check 4", 2) if 1 else _checkeval("check 5", 3)), 1) 891 | self.assertEqual((0 or 5 if 1 else _checkeval("check 6", 3)), 5) 892 | self.assertEqual((not 5 if 1 else 1), False) 893 | self.assertEqual((not 5 if 0 else 1), 1) 894 | self.assertEqual((6 + 1 if 1 else 2), 7) 895 | self.assertEqual((6 - 1 if 1 else 2), 5) 896 | self.assertEqual((6 * 2 if 1 else 4), 12) 897 | self.assertEqual((6 / 2 if 1 else 3), 3) 898 | self.assertEqual((6 < 4 if 0 else 2), 2) 899 | 900 | def testStringLiterals(self): 901 | x = ''; y = ""; self.assert_(len(x) == 0 and x == y) 902 | x = '\''; y = "'"; self.assert_(len(x) == 1 and x == y and ord(x) == 39) 903 | x = '"'; y = "\""; self.assert_(len(x) == 1 and x == y and ord(x) == 34) 904 | x = "doesn't \"shrink\" does it" 905 | y = 'doesn\'t "shrink" does it' 906 | self.assert_(len(x) == 24 and x == y) 907 | x = "does \"shrink\" doesn't it" 908 | y = 'does "shrink" doesn\'t it' 909 | self.assert_(len(x) == 24 and x == y) 910 | x = f""" 911 | The "quick" 912 | brown fo{ok()}x 913 | jumps over 914 | the 'lazy' dog. 915 | """ 916 | y = '\nThe "quick"\nbrown fox\njumps over\nthe \'lazy\' dog.\n' 917 | self.assertEquals(x, y) 918 | y = ''' 919 | The "quick" 920 | brown fox 921 | jumps over 922 | the 'lazy' dog. 923 | ''' 924 | self.assertEquals(x, y) 925 | y = "\n\ 926 | The \"quick\"\n\ 927 | brown fox\n\ 928 | jumps over\n\ 929 | the 'lazy' dog.\n\ 930 | " 931 | self.assertEquals(x, y) 932 | y = '\n\ 933 | The \"quick\"\n\ 934 | brown fox\n\ 935 | jumps over\n\ 936 | the \'lazy\' dog.\n\ 937 | ' 938 | self.assertEquals(x, y) 939 | 940 | 941 | def test_main(): 942 | run_unittest(TokenTests, GrammarTests) 943 | 944 | if __name__ == '__main__': 945 | test_main() 946 | -------------------------------------------------------------------------------- /examples/simple-statements-without-trailing-newline.py: -------------------------------------------------------------------------------- 1 | pass; print "hi" -------------------------------------------------------------------------------- /examples/tabs.py: -------------------------------------------------------------------------------- 1 | def set_password(args): 2 | password = args.password 3 | while not password : 4 | password1 = getpass("" if args.quiet else "Provide password: ") 5 | password_repeat = getpass("" if args.quiet else "Repeat password: ") 6 | if password1 != password_repeat: 7 | print("Passwords do not match, try again") 8 | elif len(password1) < 4: 9 | print("Please provide at least 4 characters") 10 | else: 11 | password = password1 12 | 13 | password_hash = passwd(password) 14 | cfg = BaseJSONConfigManager(config_dir=jupyter_config_dir()) 15 | cfg.update('jupyter_notebook_config', { 16 | 'NotebookApp': { 17 | 'password': password_hash, 18 | } 19 | }) 20 | if not args.quiet: 21 | print("password stored in config dir: %s" % jupyter_config_dir()) 22 | 23 | def main(argv): 24 | parser = argparse.ArgumentParser(argv[0]) 25 | subparsers = parser.add_subparsers() 26 | parser_password = subparsers.add_parser('password', help='sets a password for your notebook server') 27 | parser_password.add_argument("password", help="password to set, if not given, a password will be queried for (NOTE: this may not be safe)", 28 | nargs="?") 29 | parser_password.add_argument("--quiet", help="suppress messages", action="store_true") 30 | parser_password.set_defaults(function=set_password) 31 | args = parser.parse_args(argv[1:]) 32 | args.function(args) 33 | -------------------------------------------------------------------------------- /examples/trailing-whitespace.py: -------------------------------------------------------------------------------- 1 | print a 2 | 3 | if b: 4 | if c: 5 | d 6 | e 7 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/tree-sitter/tree-sitter-python 2 | 3 | go 1.22 4 | 5 | require github.com/tree-sitter/go-tree-sitter v0.24.0 6 | 7 | require github.com/mattn/go-pointer v0.0.1 // indirect 8 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/mattn/go-pointer v0.0.1 h1:n+XhsuGeVO6MEAp7xyEukFINEa+Quek5psIR/ylA6o0= 4 | github.com/mattn/go-pointer v0.0.1/go.mod h1:2zXcozF6qYGgmsG+SeTZz3oAbFLdD3OWqnUbNvJZAlc= 5 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 6 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 7 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= 8 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 9 | github.com/tree-sitter/go-tree-sitter v0.24.0 h1:kRZb6aBNfcI/u0Qh8XEt3zjNVnmxTisDBN+kXK0xRYQ= 10 | github.com/tree-sitter/go-tree-sitter v0.24.0/go.mod h1:x681iFVoLMEwOSIHA1chaLkXlroXEN7WY+VHGFaoDbk= 11 | github.com/tree-sitter/tree-sitter-c v0.21.5-0.20240818205408-927da1f210eb h1:A8425heRM8mylnv4H58FPUiH+aYivyitre0PzxrfmWs= 12 | github.com/tree-sitter/tree-sitter-c v0.21.5-0.20240818205408-927da1f210eb/go.mod h1:dOF6gtQiF9UwNh995T5OphYmtIypkjsp3ap7r9AN/iA= 13 | github.com/tree-sitter/tree-sitter-cpp v0.22.4-0.20240818224355-b1a4e2b25148 h1:AfFPZwtwGN01BW1jDdqBVqscTwetvMpydqYZz57RSlc= 14 | github.com/tree-sitter/tree-sitter-cpp v0.22.4-0.20240818224355-b1a4e2b25148/go.mod h1:Bh6U3viD57rFXRYIQ+kmiYtr+1Bx0AceypDLJJSyi9s= 15 | github.com/tree-sitter/tree-sitter-embedded-template v0.21.1-0.20240819044651-ffbf64942c33 h1:TwqSV3qLp3tKSqirGLRHnjFk9Tc2oy57LIl+FQ4GjI4= 16 | github.com/tree-sitter/tree-sitter-embedded-template v0.21.1-0.20240819044651-ffbf64942c33/go.mod h1:CvCKCt3v04Ufos1zZnNCelBDeCGRpPucaN8QczoUsN4= 17 | github.com/tree-sitter/tree-sitter-go v0.21.3-0.20240818010209-8c0f0e7a6012 h1:Xvxck3tE5FW7F7bTS97iNM2ADMyCMJztVqn5HYKdJGo= 18 | github.com/tree-sitter/tree-sitter-go v0.21.3-0.20240818010209-8c0f0e7a6012/go.mod h1:T40D0O1cPvUU/+AmiXVXy1cncYQT6wem4Z0g4SfAYvY= 19 | github.com/tree-sitter/tree-sitter-html v0.20.5-0.20240818004741-d11201a263d0 h1:c46K6uh5Dz00zJeU9BfjXdb8I+E4RkUdfnWJpQADXFo= 20 | github.com/tree-sitter/tree-sitter-html v0.20.5-0.20240818004741-d11201a263d0/go.mod h1:hcNt/kOJHcIcuMvouE7LJcYdeFUFbVpBJ6d4wmOA+tU= 21 | github.com/tree-sitter/tree-sitter-java v0.21.1-0.20240824015150-576d8097e495 h1:jrt4qbJVEFs4H93/ITxygHc6u0TGqAkkate7TQ4wFSA= 22 | github.com/tree-sitter/tree-sitter-java v0.21.1-0.20240824015150-576d8097e495/go.mod h1:oyaR7fLnRV0hT9z6qwE9GkaeTom/hTDwK3H2idcOJFc= 23 | github.com/tree-sitter/tree-sitter-javascript v0.21.5-0.20240818005344-15887341e5b5 h1:om4X9AVg3asL8gxNJDcz4e/Wp+VpQj1PY3uJXKr6EOg= 24 | github.com/tree-sitter/tree-sitter-javascript v0.21.5-0.20240818005344-15887341e5b5/go.mod h1:nNqgPoV/h9uYWk6kYEFdEAhNVOacpfpRW5SFmdaP4tU= 25 | github.com/tree-sitter/tree-sitter-json v0.21.1-0.20240818005659-bdd69eb8c8a5 h1:pfV3G3k7NCKqKk8THBmyuh2zA33lgYHS3GVrzRR8ry4= 26 | github.com/tree-sitter/tree-sitter-json v0.21.1-0.20240818005659-bdd69eb8c8a5/go.mod h1:GbMKRjLfk0H+PI7nLi1Sx5lHf5wCpLz9al8tQYSxpEk= 27 | github.com/tree-sitter/tree-sitter-php v0.22.9-0.20240819002312-a552625b56c1 h1:ZXZMDwE+IhUtGug4Brv6NjJWUU3rfkZBKpemf6RY8/g= 28 | github.com/tree-sitter/tree-sitter-php v0.22.9-0.20240819002312-a552625b56c1/go.mod h1:UKCLuYnJ312Mei+3cyTmGOHzn0YAnaPRECgJmHtzrqs= 29 | github.com/tree-sitter/tree-sitter-ruby v0.21.1-0.20240818211811-7dbc1e2d0e2d h1:fcYCvoXdcP1uRQYXqJHRy6Hec+uKScQdKVtMwK9JeCI= 30 | github.com/tree-sitter/tree-sitter-ruby v0.21.1-0.20240818211811-7dbc1e2d0e2d/go.mod h1:T1nShQ4v5AJtozZ8YyAS4uzUtDAJj/iv4YfwXSbUHzg= 31 | github.com/tree-sitter/tree-sitter-rust v0.21.3-0.20240818005432-2b43eafe6447 h1:o9alBu1J/WjrcTKEthYtXmdkDc5OVXD+PqlvnEZ0Lzc= 32 | github.com/tree-sitter/tree-sitter-rust v0.21.3-0.20240818005432-2b43eafe6447/go.mod h1:1Oh95COkkTn6Ezp0vcMbvfhRP5gLeqqljR0BYnBzWvc= 33 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 34 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 35 | -------------------------------------------------------------------------------- /grammar.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @file Python grammar for tree-sitter 3 | * @author Max Brunsfeld 4 | * @license MIT 5 | * @see {@link https://docs.python.org/2/reference/grammar.html|Python 2 grammar} 6 | * @see {@link https://docs.python.org/3/reference/grammar.html|Python 3 grammar} 7 | */ 8 | 9 | 10 | /// 11 | // @ts-check 12 | 13 | const PREC = { 14 | // this resolves a conflict between the usage of ':' in a lambda vs in a 15 | // typed parameter. In the case of a lambda, we don't allow typed parameters. 16 | lambda: -2, 17 | typed_parameter: -1, 18 | conditional: -1, 19 | 20 | parenthesized_expression: 1, 21 | parenthesized_list_splat: 1, 22 | or: 10, 23 | and: 11, 24 | not: 12, 25 | compare: 13, 26 | bitwise_or: 14, 27 | bitwise_and: 15, 28 | xor: 16, 29 | shift: 17, 30 | plus: 18, 31 | times: 19, 32 | unary: 20, 33 | power: 21, 34 | call: 22, 35 | }; 36 | 37 | const SEMICOLON = ';'; 38 | 39 | module.exports = grammar({ 40 | name: 'python', 41 | 42 | extras: $ => [ 43 | $.comment, 44 | /[\s\f\uFEFF\u2060\u200B]|\r?\n/, 45 | $.line_continuation, 46 | ], 47 | 48 | conflicts: $ => [ 49 | [$.primary_expression, $.pattern], 50 | [$.primary_expression, $.list_splat_pattern], 51 | [$.tuple, $.tuple_pattern], 52 | [$.list, $.list_pattern], 53 | [$.with_item, $._collection_elements], 54 | [$.named_expression, $.as_pattern], 55 | [$.print_statement, $.primary_expression], 56 | [$.type_alias_statement, $.primary_expression], 57 | [$.match_statement, $.primary_expression], 58 | ], 59 | 60 | supertypes: $ => [ 61 | $._simple_statement, 62 | $._compound_statement, 63 | $.expression, 64 | $.primary_expression, 65 | $.pattern, 66 | $.parameter, 67 | ], 68 | 69 | externals: $ => [ 70 | $._newline, 71 | $._indent, 72 | $._dedent, 73 | $.string_start, 74 | $._string_content, 75 | $.escape_interpolation, 76 | $.string_end, 77 | 78 | // Mark comments as external tokens so that the external scanner is always 79 | // invoked, even if no external token is expected. This allows for better 80 | // error recovery, because the external scanner can maintain the overall 81 | // structure by returning dedent tokens whenever a dedent occurs, even 82 | // if no dedent is expected. 83 | $.comment, 84 | 85 | // Allow the external scanner to check for the validity of closing brackets 86 | // so that it can avoid returning dedent tokens between brackets. 87 | ']', 88 | ')', 89 | '}', 90 | 'except', 91 | ], 92 | 93 | inline: $ => [ 94 | $._simple_statement, 95 | $._compound_statement, 96 | $._suite, 97 | $._expressions, 98 | $._left_hand_side, 99 | $.keyword_identifier, 100 | ], 101 | 102 | word: $ => $.identifier, 103 | 104 | rules: { 105 | module: $ => repeat($._statement), 106 | 107 | _statement: $ => choice( 108 | $._simple_statements, 109 | $._compound_statement, 110 | ), 111 | 112 | // Simple statements 113 | 114 | _simple_statements: $ => seq( 115 | sep1($._simple_statement, SEMICOLON), 116 | optional(SEMICOLON), 117 | $._newline, 118 | ), 119 | 120 | _simple_statement: $ => choice( 121 | $.future_import_statement, 122 | $.import_statement, 123 | $.import_from_statement, 124 | $.print_statement, 125 | $.assert_statement, 126 | $.expression_statement, 127 | $.return_statement, 128 | $.delete_statement, 129 | $.raise_statement, 130 | $.pass_statement, 131 | $.break_statement, 132 | $.continue_statement, 133 | $.global_statement, 134 | $.nonlocal_statement, 135 | $.exec_statement, 136 | $.type_alias_statement, 137 | ), 138 | 139 | import_statement: $ => seq( 140 | 'import', 141 | $._import_list, 142 | ), 143 | 144 | import_prefix: _ => repeat1('.'), 145 | 146 | relative_import: $ => seq( 147 | $.import_prefix, 148 | optional($.dotted_name), 149 | ), 150 | 151 | future_import_statement: $ => seq( 152 | 'from', 153 | '__future__', 154 | 'import', 155 | choice( 156 | $._import_list, 157 | seq('(', $._import_list, ')'), 158 | ), 159 | ), 160 | 161 | import_from_statement: $ => seq( 162 | 'from', 163 | field('module_name', choice( 164 | $.relative_import, 165 | $.dotted_name, 166 | )), 167 | 'import', 168 | choice( 169 | $.wildcard_import, 170 | $._import_list, 171 | seq('(', $._import_list, ')'), 172 | ), 173 | ), 174 | 175 | _import_list: $ => seq( 176 | commaSep1(field('name', choice( 177 | $.dotted_name, 178 | $.aliased_import, 179 | ))), 180 | optional(','), 181 | ), 182 | 183 | aliased_import: $ => seq( 184 | field('name', $.dotted_name), 185 | 'as', 186 | field('alias', $.identifier), 187 | ), 188 | 189 | wildcard_import: _ => '*', 190 | 191 | print_statement: $ => choice( 192 | prec(1, seq( 193 | 'print', 194 | $.chevron, 195 | repeat(seq(',', field('argument', $.expression))), 196 | optional(',')), 197 | ), 198 | prec(-3, prec.dynamic(-1, seq( 199 | 'print', 200 | commaSep1(field('argument', $.expression)), 201 | optional(','), 202 | ))), 203 | ), 204 | 205 | chevron: $ => seq( 206 | '>>', 207 | $.expression, 208 | ), 209 | 210 | assert_statement: $ => seq( 211 | 'assert', 212 | commaSep1($.expression), 213 | ), 214 | 215 | expression_statement: $ => choice( 216 | $.expression, 217 | seq(commaSep1($.expression), optional(',')), 218 | $.assignment, 219 | $.augmented_assignment, 220 | $.yield, 221 | ), 222 | 223 | named_expression: $ => seq( 224 | field('name', $._named_expression_lhs), 225 | ':=', 226 | field('value', $.expression), 227 | ), 228 | 229 | _named_expression_lhs: $ => choice( 230 | $.identifier, 231 | $.keyword_identifier, 232 | ), 233 | 234 | return_statement: $ => seq( 235 | 'return', 236 | optional($._expressions), 237 | ), 238 | 239 | delete_statement: $ => seq( 240 | 'del', 241 | $._expressions, 242 | ), 243 | 244 | _expressions: $ => choice( 245 | $.expression, 246 | $.expression_list, 247 | ), 248 | 249 | raise_statement: $ => seq( 250 | 'raise', 251 | optional($._expressions), 252 | optional(seq('from', field('cause', $.expression))), 253 | ), 254 | 255 | pass_statement: _ => prec.left('pass'), 256 | break_statement: _ => prec.left('break'), 257 | continue_statement: _ => prec.left('continue'), 258 | 259 | // Compound statements 260 | 261 | _compound_statement: $ => choice( 262 | $.if_statement, 263 | $.for_statement, 264 | $.while_statement, 265 | $.try_statement, 266 | $.with_statement, 267 | $.function_definition, 268 | $.class_definition, 269 | $.decorated_definition, 270 | $.match_statement, 271 | ), 272 | 273 | if_statement: $ => seq( 274 | 'if', 275 | field('condition', $.expression), 276 | ':', 277 | field('consequence', $._suite), 278 | repeat(field('alternative', $.elif_clause)), 279 | optional(field('alternative', $.else_clause)), 280 | ), 281 | 282 | elif_clause: $ => seq( 283 | 'elif', 284 | field('condition', $.expression), 285 | ':', 286 | field('consequence', $._suite), 287 | ), 288 | 289 | else_clause: $ => seq( 290 | 'else', 291 | ':', 292 | field('body', $._suite), 293 | ), 294 | 295 | match_statement: $ => seq( 296 | 'match', 297 | commaSep1(field('subject', $.expression)), 298 | optional(','), 299 | ':', 300 | field('body', alias($._match_block, $.block)), 301 | ), 302 | 303 | _match_block: $ => choice( 304 | seq( 305 | $._indent, 306 | repeat(field('alternative', $.case_clause)), 307 | $._dedent, 308 | ), 309 | $._newline, 310 | ), 311 | 312 | case_clause: $ => seq( 313 | 'case', 314 | commaSep1($.case_pattern), 315 | optional(','), 316 | optional(field('guard', $.if_clause)), 317 | ':', 318 | field('consequence', $._suite), 319 | ), 320 | 321 | for_statement: $ => seq( 322 | optional('async'), 323 | 'for', 324 | field('left', $._left_hand_side), 325 | 'in', 326 | field('right', $._expressions), 327 | ':', 328 | field('body', $._suite), 329 | field('alternative', optional($.else_clause)), 330 | ), 331 | 332 | while_statement: $ => seq( 333 | 'while', 334 | field('condition', $.expression), 335 | ':', 336 | field('body', $._suite), 337 | optional(field('alternative', $.else_clause)), 338 | ), 339 | 340 | try_statement: $ => seq( 341 | 'try', 342 | ':', 343 | field('body', $._suite), 344 | choice( 345 | seq( 346 | repeat1($.except_clause), 347 | optional($.else_clause), 348 | optional($.finally_clause), 349 | ), 350 | seq( 351 | repeat1($.except_group_clause), 352 | optional($.else_clause), 353 | optional($.finally_clause), 354 | ), 355 | $.finally_clause, 356 | ), 357 | ), 358 | 359 | except_clause: $ => seq( 360 | 'except', 361 | optional(seq( 362 | field('value', $.expression), 363 | optional(seq( 364 | choice('as', ','), 365 | field('alias', $.expression), 366 | )), 367 | )), 368 | ':', 369 | $._suite, 370 | ), 371 | 372 | except_group_clause: $ => seq( 373 | 'except*', 374 | seq( 375 | $.expression, 376 | optional(seq( 377 | 'as', 378 | $.expression, 379 | )), 380 | ), 381 | ':', 382 | $._suite, 383 | ), 384 | 385 | finally_clause: $ => seq( 386 | 'finally', 387 | ':', 388 | $._suite, 389 | ), 390 | 391 | with_statement: $ => seq( 392 | optional('async'), 393 | 'with', 394 | $.with_clause, 395 | ':', 396 | field('body', $._suite), 397 | ), 398 | 399 | with_clause: $ => choice( 400 | seq(commaSep1($.with_item), optional(',')), 401 | seq('(', commaSep1($.with_item), optional(','), ')'), 402 | ), 403 | 404 | with_item: $ => prec.dynamic(1, seq( 405 | field('value', $.expression), 406 | )), 407 | 408 | function_definition: $ => seq( 409 | optional('async'), 410 | 'def', 411 | field('name', $.identifier), 412 | field('type_parameters', optional($.type_parameter)), 413 | field('parameters', $.parameters), 414 | optional( 415 | seq( 416 | '->', 417 | field('return_type', $.type), 418 | ), 419 | ), 420 | ':', 421 | field('body', $._suite), 422 | ), 423 | 424 | parameters: $ => seq( 425 | '(', 426 | optional($._parameters), 427 | ')', 428 | ), 429 | 430 | lambda_parameters: $ => $._parameters, 431 | 432 | list_splat: $ => seq( 433 | '*', 434 | $.expression, 435 | ), 436 | 437 | dictionary_splat: $ => seq( 438 | '**', 439 | $.expression, 440 | ), 441 | 442 | global_statement: $ => seq( 443 | 'global', 444 | commaSep1($.identifier), 445 | ), 446 | 447 | nonlocal_statement: $ => seq( 448 | 'nonlocal', 449 | commaSep1($.identifier), 450 | ), 451 | 452 | exec_statement: $ => seq( 453 | 'exec', 454 | field('code', choice($.string, $.identifier)), 455 | optional( 456 | seq( 457 | 'in', 458 | commaSep1($.expression), 459 | ), 460 | ), 461 | ), 462 | 463 | type_alias_statement: $ => prec.dynamic(1, seq( 464 | 'type', 465 | field('left', $.type), 466 | '=', 467 | field('right', $.type), 468 | )), 469 | 470 | class_definition: $ => seq( 471 | 'class', 472 | field('name', $.identifier), 473 | field('type_parameters', optional($.type_parameter)), 474 | field('superclasses', optional($.argument_list)), 475 | ':', 476 | field('body', $._suite), 477 | ), 478 | type_parameter: $ => seq( 479 | '[', 480 | commaSep1($.type), 481 | optional(','), 482 | ']', 483 | ), 484 | 485 | parenthesized_list_splat: $ => prec(PREC.parenthesized_list_splat, seq( 486 | '(', 487 | choice( 488 | alias($.parenthesized_list_splat, $.parenthesized_expression), 489 | $.list_splat, 490 | ), 491 | ')', 492 | )), 493 | 494 | argument_list: $ => seq( 495 | '(', 496 | optional(commaSep1( 497 | choice( 498 | $.expression, 499 | $.list_splat, 500 | $.dictionary_splat, 501 | alias($.parenthesized_list_splat, $.parenthesized_expression), 502 | $.keyword_argument, 503 | ), 504 | )), 505 | optional(','), 506 | ')', 507 | ), 508 | 509 | decorated_definition: $ => seq( 510 | repeat1($.decorator), 511 | field('definition', choice( 512 | $.class_definition, 513 | $.function_definition, 514 | )), 515 | ), 516 | 517 | decorator: $ => seq( 518 | '@', 519 | $.expression, 520 | $._newline, 521 | ), 522 | 523 | _suite: $ => choice( 524 | alias($._simple_statements, $.block), 525 | seq($._indent, $.block), 526 | alias($._newline, $.block), 527 | ), 528 | 529 | block: $ => seq( 530 | repeat($._statement), 531 | $._dedent, 532 | ), 533 | 534 | expression_list: $ => prec.right(seq( 535 | $.expression, 536 | choice( 537 | ',', 538 | seq( 539 | repeat1(seq( 540 | ',', 541 | $.expression, 542 | )), 543 | optional(','), 544 | ), 545 | ), 546 | )), 547 | 548 | dotted_name: $ => prec(1, sep1($.identifier, '.')), 549 | 550 | // Match cases 551 | 552 | case_pattern: $ => prec(1, choice( 553 | alias($._as_pattern, $.as_pattern), 554 | $.keyword_pattern, 555 | $._simple_pattern, 556 | )), 557 | 558 | _simple_pattern: $ => prec(1, choice( 559 | $.class_pattern, 560 | $.splat_pattern, 561 | $.union_pattern, 562 | alias($._list_pattern, $.list_pattern), 563 | alias($._tuple_pattern, $.tuple_pattern), 564 | $.dict_pattern, 565 | $.string, 566 | $.concatenated_string, 567 | $.true, 568 | $.false, 569 | $.none, 570 | seq(optional('-'), choice($.integer, $.float)), 571 | $.complex_pattern, 572 | $.dotted_name, 573 | '_', 574 | )), 575 | 576 | _as_pattern: $ => seq($.case_pattern, 'as', $.identifier), 577 | 578 | union_pattern: $ => prec.right(seq($._simple_pattern, repeat1(prec.left(seq('|', $._simple_pattern))))), 579 | 580 | _list_pattern: $ => seq( 581 | '[', 582 | optional(seq( 583 | commaSep1($.case_pattern), 584 | optional(','), 585 | )), 586 | ']', 587 | ), 588 | 589 | _tuple_pattern: $ => seq( 590 | '(', 591 | optional(seq( 592 | commaSep1($.case_pattern), 593 | optional(','), 594 | )), 595 | ')', 596 | ), 597 | 598 | dict_pattern: $ => seq( 599 | '{', 600 | optional(seq( 601 | commaSep1(choice($._key_value_pattern, $.splat_pattern)), 602 | optional(','), 603 | )), 604 | '}', 605 | ), 606 | 607 | _key_value_pattern: $ => seq( 608 | field('key', $._simple_pattern), 609 | ':', 610 | field('value', $.case_pattern), 611 | ), 612 | 613 | keyword_pattern: $ => seq($.identifier, '=', $._simple_pattern), 614 | 615 | splat_pattern: $ => prec(1, seq(choice('*', '**'), choice($.identifier, '_'))), 616 | 617 | class_pattern: $ => seq( 618 | $.dotted_name, 619 | '(', 620 | optional(seq( 621 | commaSep1($.case_pattern), 622 | optional(','), 623 | )), 624 | ')', 625 | ), 626 | 627 | complex_pattern: $ => prec(1, seq( 628 | optional('-'), 629 | choice($.integer, $.float), 630 | choice('+', '-'), 631 | choice($.integer, $.float), 632 | )), 633 | 634 | // Patterns 635 | 636 | _parameters: $ => seq( 637 | commaSep1($.parameter), 638 | optional(','), 639 | ), 640 | 641 | _patterns: $ => seq( 642 | commaSep1($.pattern), 643 | optional(','), 644 | ), 645 | 646 | parameter: $ => choice( 647 | $.identifier, 648 | $.typed_parameter, 649 | $.default_parameter, 650 | $.typed_default_parameter, 651 | $.list_splat_pattern, 652 | $.tuple_pattern, 653 | $.keyword_separator, 654 | $.positional_separator, 655 | $.dictionary_splat_pattern, 656 | ), 657 | 658 | pattern: $ => choice( 659 | $.identifier, 660 | $.keyword_identifier, 661 | $.subscript, 662 | $.attribute, 663 | $.list_splat_pattern, 664 | $.tuple_pattern, 665 | $.list_pattern, 666 | ), 667 | 668 | tuple_pattern: $ => seq( 669 | '(', 670 | optional($._patterns), 671 | ')', 672 | ), 673 | 674 | list_pattern: $ => seq( 675 | '[', 676 | optional($._patterns), 677 | ']', 678 | ), 679 | 680 | default_parameter: $ => seq( 681 | field('name', choice($.identifier, $.tuple_pattern)), 682 | '=', 683 | field('value', $.expression), 684 | ), 685 | 686 | typed_default_parameter: $ => prec(PREC.typed_parameter, seq( 687 | field('name', $.identifier), 688 | ':', 689 | field('type', $.type), 690 | '=', 691 | field('value', $.expression), 692 | )), 693 | 694 | list_splat_pattern: $ => seq( 695 | '*', 696 | choice($.identifier, $.keyword_identifier, $.subscript, $.attribute), 697 | ), 698 | 699 | dictionary_splat_pattern: $ => seq( 700 | '**', 701 | choice($.identifier, $.keyword_identifier, $.subscript, $.attribute), 702 | ), 703 | 704 | // Extended patterns (patterns allowed in match statement are far more flexible than simple patterns though still a subset of "expression") 705 | 706 | as_pattern: $ => prec.left(seq( 707 | $.expression, 708 | 'as', 709 | field('alias', alias($.expression, $.as_pattern_target)), 710 | )), 711 | 712 | // Expressions 713 | 714 | _expression_within_for_in_clause: $ => choice( 715 | $.expression, 716 | alias($.lambda_within_for_in_clause, $.lambda), 717 | ), 718 | 719 | expression: $ => choice( 720 | $.comparison_operator, 721 | $.not_operator, 722 | $.boolean_operator, 723 | $.lambda, 724 | $.primary_expression, 725 | $.conditional_expression, 726 | $.named_expression, 727 | $.as_pattern, 728 | ), 729 | 730 | primary_expression: $ => choice( 731 | $.await, 732 | $.binary_operator, 733 | $.identifier, 734 | $.keyword_identifier, 735 | $.string, 736 | $.concatenated_string, 737 | $.integer, 738 | $.float, 739 | $.true, 740 | $.false, 741 | $.none, 742 | $.unary_operator, 743 | $.attribute, 744 | $.subscript, 745 | $.call, 746 | $.list, 747 | $.list_comprehension, 748 | $.dictionary, 749 | $.dictionary_comprehension, 750 | $.set, 751 | $.set_comprehension, 752 | $.tuple, 753 | $.parenthesized_expression, 754 | $.generator_expression, 755 | $.ellipsis, 756 | alias($.list_splat_pattern, $.list_splat), 757 | ), 758 | 759 | not_operator: $ => prec(PREC.not, seq( 760 | 'not', 761 | field('argument', $.expression), 762 | )), 763 | 764 | boolean_operator: $ => choice( 765 | prec.left(PREC.and, seq( 766 | field('left', $.expression), 767 | field('operator', 'and'), 768 | field('right', $.expression), 769 | )), 770 | prec.left(PREC.or, seq( 771 | field('left', $.expression), 772 | field('operator', 'or'), 773 | field('right', $.expression), 774 | )), 775 | ), 776 | 777 | binary_operator: $ => { 778 | const table = [ 779 | [prec.left, '+', PREC.plus], 780 | [prec.left, '-', PREC.plus], 781 | [prec.left, '*', PREC.times], 782 | [prec.left, '@', PREC.times], 783 | [prec.left, '/', PREC.times], 784 | [prec.left, '%', PREC.times], 785 | [prec.left, '//', PREC.times], 786 | [prec.right, '**', PREC.power], 787 | [prec.left, '|', PREC.bitwise_or], 788 | [prec.left, '&', PREC.bitwise_and], 789 | [prec.left, '^', PREC.xor], 790 | [prec.left, '<<', PREC.shift], 791 | [prec.left, '>>', PREC.shift], 792 | ]; 793 | 794 | // @ts-ignore 795 | return choice(...table.map(([fn, operator, precedence]) => fn(precedence, seq( 796 | field('left', $.primary_expression), 797 | // @ts-ignore 798 | field('operator', operator), 799 | field('right', $.primary_expression), 800 | )))); 801 | }, 802 | 803 | unary_operator: $ => prec(PREC.unary, seq( 804 | field('operator', choice('+', '-', '~')), 805 | field('argument', $.primary_expression), 806 | )), 807 | 808 | _not_in: _ => seq('not', 'in'), 809 | 810 | _is_not: _ => seq('is', 'not'), 811 | 812 | comparison_operator: $ => prec.left(PREC.compare, seq( 813 | $.primary_expression, 814 | repeat1(seq( 815 | field('operators', 816 | choice( 817 | '<', 818 | '<=', 819 | '==', 820 | '!=', 821 | '>=', 822 | '>', 823 | '<>', 824 | 'in', 825 | alias($._not_in, 'not in'), 826 | 'is', 827 | alias($._is_not, 'is not'), 828 | )), 829 | $.primary_expression, 830 | )), 831 | )), 832 | 833 | lambda: $ => prec(PREC.lambda, seq( 834 | 'lambda', 835 | field('parameters', optional($.lambda_parameters)), 836 | ':', 837 | field('body', $.expression), 838 | )), 839 | 840 | lambda_within_for_in_clause: $ => seq( 841 | 'lambda', 842 | field('parameters', optional($.lambda_parameters)), 843 | ':', 844 | field('body', $._expression_within_for_in_clause), 845 | ), 846 | 847 | assignment: $ => seq( 848 | field('left', $._left_hand_side), 849 | choice( 850 | seq('=', field('right', $._right_hand_side)), 851 | seq(':', field('type', $.type)), 852 | seq(':', field('type', $.type), '=', field('right', $._right_hand_side)), 853 | ), 854 | ), 855 | 856 | augmented_assignment: $ => seq( 857 | field('left', $._left_hand_side), 858 | field('operator', choice( 859 | '+=', '-=', '*=', '/=', '@=', '//=', '%=', '**=', 860 | '>>=', '<<=', '&=', '^=', '|=', 861 | )), 862 | field('right', $._right_hand_side), 863 | ), 864 | 865 | _left_hand_side: $ => choice( 866 | $.pattern, 867 | $.pattern_list, 868 | ), 869 | 870 | pattern_list: $ => seq( 871 | $.pattern, 872 | choice( 873 | ',', 874 | seq( 875 | repeat1(seq( 876 | ',', 877 | $.pattern, 878 | )), 879 | optional(','), 880 | ), 881 | ), 882 | ), 883 | 884 | _right_hand_side: $ => choice( 885 | $.expression, 886 | $.expression_list, 887 | $.assignment, 888 | $.augmented_assignment, 889 | $.pattern_list, 890 | $.yield, 891 | ), 892 | 893 | yield: $ => prec.right(seq( 894 | 'yield', 895 | choice( 896 | seq( 897 | 'from', 898 | $.expression, 899 | ), 900 | optional($._expressions), 901 | ), 902 | )), 903 | 904 | attribute: $ => prec(PREC.call, seq( 905 | field('object', $.primary_expression), 906 | '.', 907 | field('attribute', $.identifier), 908 | )), 909 | 910 | subscript: $ => prec(PREC.call, seq( 911 | field('value', $.primary_expression), 912 | '[', 913 | commaSep1(field('subscript', choice($.expression, $.slice))), 914 | optional(','), 915 | ']', 916 | )), 917 | 918 | slice: $ => seq( 919 | optional($.expression), 920 | ':', 921 | optional($.expression), 922 | optional(seq(':', optional($.expression))), 923 | ), 924 | 925 | ellipsis: _ => '...', 926 | 927 | call: $ => prec(PREC.call, seq( 928 | field('function', $.primary_expression), 929 | field('arguments', choice( 930 | $.generator_expression, 931 | $.argument_list, 932 | )), 933 | )), 934 | 935 | typed_parameter: $ => prec(PREC.typed_parameter, seq( 936 | choice( 937 | $.identifier, 938 | $.list_splat_pattern, 939 | $.dictionary_splat_pattern, 940 | ), 941 | ':', 942 | field('type', $.type), 943 | )), 944 | 945 | type: $ => choice( 946 | prec(1, $.expression), 947 | $.splat_type, 948 | $.generic_type, 949 | $.union_type, 950 | $.constrained_type, 951 | $.member_type, 952 | ), 953 | splat_type: $ => prec(1, seq(choice('*', '**'), $.identifier)), 954 | generic_type: $ => prec(1, seq( 955 | choice( 956 | $.identifier, 957 | alias('type', $.identifier), 958 | ), 959 | $.type_parameter, 960 | )), 961 | union_type: $ => prec.left(seq($.type, '|', $.type)), 962 | constrained_type: $ => prec.right(seq($.type, ':', $.type)), 963 | member_type: $ => seq($.type, '.', $.identifier), 964 | 965 | keyword_argument: $ => seq( 966 | field('name', choice($.identifier, $.keyword_identifier)), 967 | '=', 968 | field('value', $.expression), 969 | ), 970 | 971 | // Literals 972 | 973 | list: $ => seq( 974 | '[', 975 | optional($._collection_elements), 976 | ']', 977 | ), 978 | 979 | set: $ => seq( 980 | '{', 981 | $._collection_elements, 982 | '}', 983 | ), 984 | 985 | tuple: $ => seq( 986 | '(', 987 | optional($._collection_elements), 988 | ')', 989 | ), 990 | 991 | dictionary: $ => seq( 992 | '{', 993 | optional(commaSep1(choice($.pair, $.dictionary_splat))), 994 | optional(','), 995 | '}', 996 | ), 997 | 998 | pair: $ => seq( 999 | field('key', $.expression), 1000 | ':', 1001 | field('value', $.expression), 1002 | ), 1003 | 1004 | list_comprehension: $ => seq( 1005 | '[', 1006 | field('body', $.expression), 1007 | $._comprehension_clauses, 1008 | ']', 1009 | ), 1010 | 1011 | dictionary_comprehension: $ => seq( 1012 | '{', 1013 | field('body', $.pair), 1014 | $._comprehension_clauses, 1015 | '}', 1016 | ), 1017 | 1018 | set_comprehension: $ => seq( 1019 | '{', 1020 | field('body', $.expression), 1021 | $._comprehension_clauses, 1022 | '}', 1023 | ), 1024 | 1025 | generator_expression: $ => seq( 1026 | '(', 1027 | field('body', $.expression), 1028 | $._comprehension_clauses, 1029 | ')', 1030 | ), 1031 | 1032 | _comprehension_clauses: $ => seq( 1033 | $.for_in_clause, 1034 | repeat(choice( 1035 | $.for_in_clause, 1036 | $.if_clause, 1037 | )), 1038 | ), 1039 | 1040 | parenthesized_expression: $ => prec(PREC.parenthesized_expression, seq( 1041 | '(', 1042 | choice($.expression, $.yield), 1043 | ')', 1044 | )), 1045 | 1046 | _collection_elements: $ => seq( 1047 | commaSep1(choice( 1048 | $.expression, $.yield, $.list_splat, $.parenthesized_list_splat, 1049 | )), 1050 | optional(','), 1051 | ), 1052 | 1053 | for_in_clause: $ => prec.left(seq( 1054 | optional('async'), 1055 | 'for', 1056 | field('left', $._left_hand_side), 1057 | 'in', 1058 | field('right', commaSep1($._expression_within_for_in_clause)), 1059 | optional(','), 1060 | )), 1061 | 1062 | if_clause: $ => seq( 1063 | 'if', 1064 | $.expression, 1065 | ), 1066 | 1067 | conditional_expression: $ => prec.right(PREC.conditional, seq( 1068 | $.expression, 1069 | 'if', 1070 | $.expression, 1071 | 'else', 1072 | $.expression, 1073 | )), 1074 | 1075 | concatenated_string: $ => seq( 1076 | $.string, 1077 | repeat1($.string), 1078 | ), 1079 | 1080 | string: $ => seq( 1081 | $.string_start, 1082 | repeat(choice($.interpolation, $.string_content)), 1083 | $.string_end, 1084 | ), 1085 | 1086 | string_content: $ => prec.right(repeat1( 1087 | choice( 1088 | $.escape_interpolation, 1089 | $.escape_sequence, 1090 | $._not_escape_sequence, 1091 | $._string_content, 1092 | ))), 1093 | 1094 | interpolation: $ => seq( 1095 | '{', 1096 | field('expression', $._f_expression), 1097 | optional('='), 1098 | optional(field('type_conversion', $.type_conversion)), 1099 | optional(field('format_specifier', $.format_specifier)), 1100 | '}', 1101 | ), 1102 | 1103 | _f_expression: $ => choice( 1104 | $.expression, 1105 | $.expression_list, 1106 | $.pattern_list, 1107 | $.yield, 1108 | ), 1109 | 1110 | escape_sequence: _ => token.immediate(prec(1, seq( 1111 | '\\', 1112 | choice( 1113 | /u[a-fA-F\d]{4}/, 1114 | /U[a-fA-F\d]{8}/, 1115 | /x[a-fA-F\d]{2}/, 1116 | /\d{1,3}/, 1117 | /\r?\n/, 1118 | /['"abfrntv\\]/, 1119 | /N\{[^}]+\}/, 1120 | ), 1121 | ))), 1122 | 1123 | _not_escape_sequence: _ => token.immediate('\\'), 1124 | 1125 | format_specifier: $ => seq( 1126 | ':', 1127 | repeat(choice( 1128 | token(prec(1, /[^{}\n]+/)), 1129 | alias($.interpolation, $.format_expression), 1130 | )), 1131 | ), 1132 | 1133 | type_conversion: _ => /![a-z]/, 1134 | 1135 | integer: _ => token(choice( 1136 | seq( 1137 | choice('0x', '0X'), 1138 | repeat1(/_?[A-Fa-f0-9]+/), 1139 | optional(/[Ll]/), 1140 | ), 1141 | seq( 1142 | choice('0o', '0O'), 1143 | repeat1(/_?[0-7]+/), 1144 | optional(/[Ll]/), 1145 | ), 1146 | seq( 1147 | choice('0b', '0B'), 1148 | repeat1(/_?[0-1]+/), 1149 | optional(/[Ll]/), 1150 | ), 1151 | seq( 1152 | repeat1(/[0-9]+_?/), 1153 | choice( 1154 | optional(/[Ll]/), // long numbers 1155 | optional(/[jJ]/), // complex numbers 1156 | ), 1157 | ), 1158 | )), 1159 | 1160 | float: _ => { 1161 | const digits = repeat1(/[0-9]+_?/); 1162 | const exponent = seq(/[eE][\+-]?/, digits); 1163 | 1164 | return token(seq( 1165 | choice( 1166 | seq(digits, '.', optional(digits), optional(exponent)), 1167 | seq(optional(digits), '.', digits, optional(exponent)), 1168 | seq(digits, exponent), 1169 | ), 1170 | optional(/[jJ]/), 1171 | )); 1172 | }, 1173 | 1174 | identifier: _ => /[_\p{XID_Start}][_\p{XID_Continue}]*/, 1175 | 1176 | keyword_identifier: $ => choice( 1177 | prec(-3, alias( 1178 | choice( 1179 | 'print', 1180 | 'exec', 1181 | 'async', 1182 | 'await', 1183 | ), 1184 | $.identifier, 1185 | )), 1186 | alias( 1187 | choice('type', 'match'), 1188 | $.identifier, 1189 | ), 1190 | ), 1191 | 1192 | true: _ => 'True', 1193 | false: _ => 'False', 1194 | none: _ => 'None', 1195 | 1196 | await: $ => prec(PREC.unary, seq( 1197 | 'await', 1198 | $.primary_expression, 1199 | )), 1200 | 1201 | comment: _ => token(seq('#', /.*/)), 1202 | 1203 | line_continuation: _ => token(seq('\\', choice(seq(optional('\r'), '\n'), '\0'))), 1204 | 1205 | positional_separator: _ => '/', 1206 | keyword_separator: _ => '*', 1207 | }, 1208 | }); 1209 | 1210 | module.exports.PREC = PREC; 1211 | 1212 | /** 1213 | * Creates a rule to match one or more of the rules separated by a comma 1214 | * 1215 | * @param {RuleOrLiteral} rule 1216 | * 1217 | * @returns {SeqRule} 1218 | */ 1219 | function commaSep1(rule) { 1220 | return sep1(rule, ','); 1221 | } 1222 | 1223 | /** 1224 | * Creates a rule to match one or more occurrences of `rule` separated by `sep` 1225 | * 1226 | * @param {RuleOrLiteral} rule 1227 | * 1228 | * @param {RuleOrLiteral} separator 1229 | * 1230 | * @returns {SeqRule} 1231 | */ 1232 | function sep1(rule, separator) { 1233 | return seq(rule, repeat(seq(separator, rule))); 1234 | } 1235 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tree-sitter-python", 3 | "version": "0.23.6", 4 | "description": "Python grammar for tree-sitter", 5 | "repository": "https://github.com/tree-sitter/tree-sitter-python", 6 | "license": "MIT", 7 | "author": { 8 | "name": "Max Brunsfeld", 9 | "email": "maxbrunsfeld@gmail.com" 10 | }, 11 | "contributors": [ 12 | { 13 | "name": "Amaan Qureshi", 14 | "email": "amaanq12@gmail.com" 15 | } 16 | ], 17 | "main": "bindings/node", 18 | "types": "bindings/node", 19 | "keywords": [ 20 | "incremental", 21 | "parsing", 22 | "tree-sitter", 23 | "python" 24 | ], 25 | "files": [ 26 | "grammar.js", 27 | "tree-sitter.json", 28 | "binding.gyp", 29 | "prebuilds/**", 30 | "bindings/node/*", 31 | "queries/*", 32 | "src/**", 33 | "*.wasm" 34 | ], 35 | "dependencies": { 36 | "node-addon-api": "^8.3.0", 37 | "node-gyp-build": "^4.8.4" 38 | }, 39 | "devDependencies": { 40 | "eslint": "^9.17.0", 41 | "eslint-config-treesitter": "^1.0.2", 42 | "prebuildify": "^6.0.1", 43 | "tree-sitter-cli": "^0.24.5" 44 | }, 45 | "peerDependencies": { 46 | "tree-sitter": "^0.22.1" 47 | }, 48 | "peerDependenciesMeta": { 49 | "tree-sitter": { 50 | "optional": true 51 | } 52 | }, 53 | "scripts": { 54 | "install": "node-gyp-build", 55 | "lint": "eslint grammar.js", 56 | "prestart": "tree-sitter build --wasm", 57 | "start": "tree-sitter playground", 58 | "test": "node --test bindings/node/*_test.js" 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=42", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "tree-sitter-python" 7 | description = "Python grammar for tree-sitter" 8 | version = "0.23.6" 9 | keywords = ["incremental", "parsing", "tree-sitter", "python"] 10 | classifiers = [ 11 | "Intended Audience :: Developers", 12 | "License :: OSI Approved :: MIT License", 13 | "Topic :: Software Development :: Compilers", 14 | "Topic :: Text Processing :: Linguistic", 15 | "Typing :: Typed", 16 | ] 17 | authors = [ 18 | { name = "Max Brunsfeld", email = "maxbrunsfeld@gmail.com" }, 19 | { name = "Amaan Qureshi", email = "amaanq12@gmail.com" }, 20 | ] 21 | requires-python = ">=3.9" 22 | license.text = "MIT" 23 | readme = "README.md" 24 | 25 | [project.urls] 26 | Homepage = "https://github.com/tree-sitter/tree-sitter-python" 27 | 28 | [project.optional-dependencies] 29 | core = ["tree-sitter~=0.22"] 30 | 31 | [tool.cibuildwheel] 32 | build = "cp39-*" 33 | build-frontend = "build" 34 | -------------------------------------------------------------------------------- /queries/highlights.scm: -------------------------------------------------------------------------------- 1 | ; Identifier naming conventions 2 | 3 | (identifier) @variable 4 | 5 | ((identifier) @constructor 6 | (#match? @constructor "^[A-Z]")) 7 | 8 | ((identifier) @constant 9 | (#match? @constant "^[A-Z][A-Z_]*$")) 10 | 11 | ; Function calls 12 | 13 | (decorator) @function 14 | (decorator 15 | (identifier) @function) 16 | 17 | (call 18 | function: (attribute attribute: (identifier) @function.method)) 19 | (call 20 | function: (identifier) @function) 21 | 22 | ; Builtin functions 23 | 24 | ((call 25 | function: (identifier) @function.builtin) 26 | (#match? 27 | @function.builtin 28 | "^(abs|all|any|ascii|bin|bool|breakpoint|bytearray|bytes|callable|chr|classmethod|compile|complex|delattr|dict|dir|divmod|enumerate|eval|exec|filter|float|format|frozenset|getattr|globals|hasattr|hash|help|hex|id|input|int|isinstance|issubclass|iter|len|list|locals|map|max|memoryview|min|next|object|oct|open|ord|pow|print|property|range|repr|reversed|round|set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|vars|zip|__import__)$")) 29 | 30 | ; Function definitions 31 | 32 | (function_definition 33 | name: (identifier) @function) 34 | 35 | (attribute attribute: (identifier) @property) 36 | (type (identifier) @type) 37 | 38 | ; Literals 39 | 40 | [ 41 | (none) 42 | (true) 43 | (false) 44 | ] @constant.builtin 45 | 46 | [ 47 | (integer) 48 | (float) 49 | ] @number 50 | 51 | (comment) @comment 52 | (string) @string 53 | (escape_sequence) @escape 54 | 55 | (interpolation 56 | "{" @punctuation.special 57 | "}" @punctuation.special) @embedded 58 | 59 | [ 60 | "-" 61 | "-=" 62 | "!=" 63 | "*" 64 | "**" 65 | "**=" 66 | "*=" 67 | "/" 68 | "//" 69 | "//=" 70 | "/=" 71 | "&" 72 | "&=" 73 | "%" 74 | "%=" 75 | "^" 76 | "^=" 77 | "+" 78 | "->" 79 | "+=" 80 | "<" 81 | "<<" 82 | "<<=" 83 | "<=" 84 | "<>" 85 | "=" 86 | ":=" 87 | "==" 88 | ">" 89 | ">=" 90 | ">>" 91 | ">>=" 92 | "|" 93 | "|=" 94 | "~" 95 | "@=" 96 | "and" 97 | "in" 98 | "is" 99 | "not" 100 | "or" 101 | "is not" 102 | "not in" 103 | ] @operator 104 | 105 | [ 106 | "as" 107 | "assert" 108 | "async" 109 | "await" 110 | "break" 111 | "class" 112 | "continue" 113 | "def" 114 | "del" 115 | "elif" 116 | "else" 117 | "except" 118 | "exec" 119 | "finally" 120 | "for" 121 | "from" 122 | "global" 123 | "if" 124 | "import" 125 | "lambda" 126 | "nonlocal" 127 | "pass" 128 | "print" 129 | "raise" 130 | "return" 131 | "try" 132 | "while" 133 | "with" 134 | "yield" 135 | "match" 136 | "case" 137 | ] @keyword 138 | -------------------------------------------------------------------------------- /queries/tags.scm: -------------------------------------------------------------------------------- 1 | (module (expression_statement (assignment left: (identifier) @name) @definition.constant)) 2 | 3 | (class_definition 4 | name: (identifier) @name) @definition.class 5 | 6 | (function_definition 7 | name: (identifier) @name) @definition.function 8 | 9 | (call 10 | function: [ 11 | (identifier) @name 12 | (attribute 13 | attribute: (identifier) @name) 14 | ]) @reference.call 15 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from os.path import isdir, join 2 | from platform import system 3 | 4 | from setuptools import Extension, find_packages, setup 5 | from setuptools.command.build import build 6 | from wheel.bdist_wheel import bdist_wheel 7 | 8 | 9 | class Build(build): 10 | def run(self): 11 | if isdir("queries"): 12 | dest = join(self.build_lib, "tree_sitter_python", "queries") 13 | self.copy_tree("queries", dest) 14 | super().run() 15 | 16 | 17 | class BdistWheel(bdist_wheel): 18 | def get_tag(self): 19 | python, abi, platform = super().get_tag() 20 | if python.startswith("cp"): 21 | python, abi = "cp39", "abi3" 22 | return python, abi, platform 23 | 24 | 25 | setup( 26 | packages=find_packages("bindings/python"), 27 | package_dir={"": "bindings/python"}, 28 | package_data={ 29 | "tree_sitter_python": ["*.pyi", "py.typed"], 30 | "tree_sitter_python.queries": ["*.scm"], 31 | }, 32 | ext_package="tree_sitter_python", 33 | ext_modules=[ 34 | Extension( 35 | name="_binding", 36 | sources=[ 37 | "bindings/python/tree_sitter_python/binding.c", 38 | "src/parser.c", 39 | "src/scanner.c", 40 | ], 41 | extra_compile_args=[ 42 | "-std=c11", 43 | "-fvisibility=hidden", 44 | ] if system() != "Windows" else [ 45 | "/std:c11", 46 | "/utf-8", 47 | ], 48 | define_macros=[ 49 | ("Py_LIMITED_API", "0x03090000"), 50 | ("PY_SSIZE_T_CLEAN", None), 51 | ("TREE_SITTER_HIDE_SYMBOLS", None), 52 | ], 53 | include_dirs=["src"], 54 | py_limited_api=True, 55 | ) 56 | ], 57 | cmdclass={ 58 | "build": Build, 59 | "bdist_wheel": BdistWheel 60 | }, 61 | zip_safe=False 62 | ) 63 | -------------------------------------------------------------------------------- /src/scanner.c: -------------------------------------------------------------------------------- 1 | #include "tree_sitter/array.h" 2 | #include "tree_sitter/parser.h" 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | enum TokenType { 10 | NEWLINE, 11 | INDENT, 12 | DEDENT, 13 | STRING_START, 14 | STRING_CONTENT, 15 | ESCAPE_INTERPOLATION, 16 | STRING_END, 17 | COMMENT, 18 | CLOSE_PAREN, 19 | CLOSE_BRACKET, 20 | CLOSE_BRACE, 21 | EXCEPT, 22 | }; 23 | 24 | typedef enum { 25 | SingleQuote = 1 << 0, 26 | DoubleQuote = 1 << 1, 27 | BackQuote = 1 << 2, 28 | Raw = 1 << 3, 29 | Format = 1 << 4, 30 | Triple = 1 << 5, 31 | Bytes = 1 << 6, 32 | } Flags; 33 | 34 | typedef struct { 35 | char flags; 36 | } Delimiter; 37 | 38 | static inline Delimiter new_delimiter() { return (Delimiter){0}; } 39 | 40 | static inline bool is_format(Delimiter *delimiter) { return delimiter->flags & Format; } 41 | 42 | static inline bool is_raw(Delimiter *delimiter) { return delimiter->flags & Raw; } 43 | 44 | static inline bool is_triple(Delimiter *delimiter) { return delimiter->flags & Triple; } 45 | 46 | static inline bool is_bytes(Delimiter *delimiter) { return delimiter->flags & Bytes; } 47 | 48 | static inline int32_t end_character(Delimiter *delimiter) { 49 | if (delimiter->flags & SingleQuote) { 50 | return '\''; 51 | } 52 | if (delimiter->flags & DoubleQuote) { 53 | return '"'; 54 | } 55 | if (delimiter->flags & BackQuote) { 56 | return '`'; 57 | } 58 | return 0; 59 | } 60 | 61 | static inline void set_format(Delimiter *delimiter) { delimiter->flags |= Format; } 62 | 63 | static inline void set_raw(Delimiter *delimiter) { delimiter->flags |= Raw; } 64 | 65 | static inline void set_triple(Delimiter *delimiter) { delimiter->flags |= Triple; } 66 | 67 | static inline void set_bytes(Delimiter *delimiter) { delimiter->flags |= Bytes; } 68 | 69 | static inline void set_end_character(Delimiter *delimiter, int32_t character) { 70 | switch (character) { 71 | case '\'': 72 | delimiter->flags |= SingleQuote; 73 | break; 74 | case '"': 75 | delimiter->flags |= DoubleQuote; 76 | break; 77 | case '`': 78 | delimiter->flags |= BackQuote; 79 | break; 80 | default: 81 | assert(false); 82 | } 83 | } 84 | 85 | typedef struct { 86 | Array(uint16_t) indents; 87 | Array(Delimiter) delimiters; 88 | bool inside_f_string; 89 | } Scanner; 90 | 91 | static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); } 92 | 93 | static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); } 94 | 95 | bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) { 96 | Scanner *scanner = (Scanner *)payload; 97 | 98 | bool error_recovery_mode = valid_symbols[STRING_CONTENT] && valid_symbols[INDENT]; 99 | bool within_brackets = valid_symbols[CLOSE_BRACE] || valid_symbols[CLOSE_PAREN] || valid_symbols[CLOSE_BRACKET]; 100 | 101 | bool advanced_once = false; 102 | if (valid_symbols[ESCAPE_INTERPOLATION] && scanner->delimiters.size > 0 && 103 | (lexer->lookahead == '{' || lexer->lookahead == '}') && !error_recovery_mode) { 104 | Delimiter *delimiter = array_back(&scanner->delimiters); 105 | if (is_format(delimiter)) { 106 | lexer->mark_end(lexer); 107 | bool is_left_brace = lexer->lookahead == '{'; 108 | advance(lexer); 109 | advanced_once = true; 110 | if ((lexer->lookahead == '{' && is_left_brace) || (lexer->lookahead == '}' && !is_left_brace)) { 111 | advance(lexer); 112 | lexer->mark_end(lexer); 113 | lexer->result_symbol = ESCAPE_INTERPOLATION; 114 | return true; 115 | } 116 | return false; 117 | } 118 | } 119 | 120 | if (valid_symbols[STRING_CONTENT] && scanner->delimiters.size > 0 && !error_recovery_mode) { 121 | Delimiter *delimiter = array_back(&scanner->delimiters); 122 | int32_t end_char = end_character(delimiter); 123 | bool has_content = advanced_once; 124 | while (lexer->lookahead) { 125 | if ((advanced_once || lexer->lookahead == '{' || lexer->lookahead == '}') && is_format(delimiter)) { 126 | lexer->mark_end(lexer); 127 | lexer->result_symbol = STRING_CONTENT; 128 | return has_content; 129 | } 130 | if (lexer->lookahead == '\\') { 131 | if (is_raw(delimiter)) { 132 | // Step over the backslash. 133 | advance(lexer); 134 | // Step over any escaped quotes. 135 | if (lexer->lookahead == end_character(delimiter) || lexer->lookahead == '\\') { 136 | advance(lexer); 137 | } 138 | // Step over newlines 139 | if (lexer->lookahead == '\r') { 140 | advance(lexer); 141 | if (lexer->lookahead == '\n') { 142 | advance(lexer); 143 | } 144 | } else if (lexer->lookahead == '\n') { 145 | advance(lexer); 146 | } 147 | continue; 148 | } 149 | if (is_bytes(delimiter)) { 150 | lexer->mark_end(lexer); 151 | advance(lexer); 152 | if (lexer->lookahead == 'N' || lexer->lookahead == 'u' || lexer->lookahead == 'U') { 153 | // In bytes string, \N{...}, \uXXXX and \UXXXXXXXX are 154 | // not escape sequences 155 | // https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals 156 | advance(lexer); 157 | } else { 158 | lexer->result_symbol = STRING_CONTENT; 159 | return has_content; 160 | } 161 | } else { 162 | lexer->mark_end(lexer); 163 | lexer->result_symbol = STRING_CONTENT; 164 | return has_content; 165 | } 166 | } else if (lexer->lookahead == end_char) { 167 | if (is_triple(delimiter)) { 168 | lexer->mark_end(lexer); 169 | advance(lexer); 170 | if (lexer->lookahead == end_char) { 171 | advance(lexer); 172 | if (lexer->lookahead == end_char) { 173 | if (has_content) { 174 | lexer->result_symbol = STRING_CONTENT; 175 | } else { 176 | advance(lexer); 177 | lexer->mark_end(lexer); 178 | array_pop(&scanner->delimiters); 179 | lexer->result_symbol = STRING_END; 180 | scanner->inside_f_string = false; 181 | } 182 | return true; 183 | } 184 | lexer->mark_end(lexer); 185 | lexer->result_symbol = STRING_CONTENT; 186 | return true; 187 | } 188 | lexer->mark_end(lexer); 189 | lexer->result_symbol = STRING_CONTENT; 190 | return true; 191 | } 192 | if (has_content) { 193 | lexer->result_symbol = STRING_CONTENT; 194 | } else { 195 | advance(lexer); 196 | array_pop(&scanner->delimiters); 197 | lexer->result_symbol = STRING_END; 198 | scanner->inside_f_string = false; 199 | } 200 | lexer->mark_end(lexer); 201 | return true; 202 | 203 | } else if (lexer->lookahead == '\n' && has_content && !is_triple(delimiter)) { 204 | return false; 205 | } 206 | advance(lexer); 207 | has_content = true; 208 | } 209 | } 210 | 211 | lexer->mark_end(lexer); 212 | 213 | bool found_end_of_line = false; 214 | uint16_t indent_length = 0; 215 | int32_t first_comment_indent_length = -1; 216 | for (;;) { 217 | if (lexer->lookahead == '\n') { 218 | found_end_of_line = true; 219 | indent_length = 0; 220 | skip(lexer); 221 | } else if (lexer->lookahead == ' ') { 222 | indent_length++; 223 | skip(lexer); 224 | } else if (lexer->lookahead == '\r' || lexer->lookahead == '\f') { 225 | indent_length = 0; 226 | skip(lexer); 227 | } else if (lexer->lookahead == '\t') { 228 | indent_length += 8; 229 | skip(lexer); 230 | } else if (lexer->lookahead == '#' && (valid_symbols[INDENT] || valid_symbols[DEDENT] || 231 | valid_symbols[NEWLINE] || valid_symbols[EXCEPT])) { 232 | // If we haven't found an EOL yet, 233 | // then this is a comment after an expression: 234 | // foo = bar # comment 235 | // Just return, since we don't want to generate an indent/dedent 236 | // token. 237 | if (!found_end_of_line) { 238 | return false; 239 | } 240 | if (first_comment_indent_length == -1) { 241 | first_comment_indent_length = (int32_t)indent_length; 242 | } 243 | while (lexer->lookahead && lexer->lookahead != '\n') { 244 | skip(lexer); 245 | } 246 | skip(lexer); 247 | indent_length = 0; 248 | } else if (lexer->lookahead == '\\') { 249 | skip(lexer); 250 | if (lexer->lookahead == '\r') { 251 | skip(lexer); 252 | } 253 | if (lexer->lookahead == '\n' || lexer->eof(lexer)) { 254 | skip(lexer); 255 | } else { 256 | return false; 257 | } 258 | } else if (lexer->eof(lexer)) { 259 | indent_length = 0; 260 | found_end_of_line = true; 261 | break; 262 | } else { 263 | break; 264 | } 265 | } 266 | 267 | if (found_end_of_line) { 268 | if (scanner->indents.size > 0) { 269 | uint16_t current_indent_length = *array_back(&scanner->indents); 270 | 271 | if (valid_symbols[INDENT] && indent_length > current_indent_length) { 272 | array_push(&scanner->indents, indent_length); 273 | lexer->result_symbol = INDENT; 274 | return true; 275 | } 276 | 277 | bool next_tok_is_string_start = 278 | lexer->lookahead == '\"' || lexer->lookahead == '\'' || lexer->lookahead == '`'; 279 | 280 | if ((valid_symbols[DEDENT] || 281 | (!valid_symbols[NEWLINE] && !(valid_symbols[STRING_START] && next_tok_is_string_start) && 282 | !within_brackets)) && 283 | indent_length < current_indent_length && !scanner->inside_f_string && 284 | 285 | // Wait to create a dedent token until we've consumed any 286 | // comments 287 | // whose indentation matches the current block. 288 | first_comment_indent_length < (int32_t)current_indent_length) { 289 | array_pop(&scanner->indents); 290 | lexer->result_symbol = DEDENT; 291 | return true; 292 | } 293 | } 294 | 295 | if (valid_symbols[NEWLINE] && !error_recovery_mode) { 296 | lexer->result_symbol = NEWLINE; 297 | return true; 298 | } 299 | } 300 | 301 | if (first_comment_indent_length == -1 && valid_symbols[STRING_START]) { 302 | Delimiter delimiter = new_delimiter(); 303 | 304 | bool has_flags = false; 305 | while (lexer->lookahead) { 306 | if (lexer->lookahead == 'f' || lexer->lookahead == 'F') { 307 | set_format(&delimiter); 308 | } else if (lexer->lookahead == 'r' || lexer->lookahead == 'R') { 309 | set_raw(&delimiter); 310 | } else if (lexer->lookahead == 'b' || lexer->lookahead == 'B') { 311 | set_bytes(&delimiter); 312 | } else if (lexer->lookahead != 'u' && lexer->lookahead != 'U') { 313 | break; 314 | } 315 | has_flags = true; 316 | advance(lexer); 317 | } 318 | 319 | if (lexer->lookahead == '`') { 320 | set_end_character(&delimiter, '`'); 321 | advance(lexer); 322 | lexer->mark_end(lexer); 323 | } else if (lexer->lookahead == '\'') { 324 | set_end_character(&delimiter, '\''); 325 | advance(lexer); 326 | lexer->mark_end(lexer); 327 | if (lexer->lookahead == '\'') { 328 | advance(lexer); 329 | if (lexer->lookahead == '\'') { 330 | advance(lexer); 331 | lexer->mark_end(lexer); 332 | set_triple(&delimiter); 333 | } 334 | } 335 | } else if (lexer->lookahead == '"') { 336 | set_end_character(&delimiter, '"'); 337 | advance(lexer); 338 | lexer->mark_end(lexer); 339 | if (lexer->lookahead == '"') { 340 | advance(lexer); 341 | if (lexer->lookahead == '"') { 342 | advance(lexer); 343 | lexer->mark_end(lexer); 344 | set_triple(&delimiter); 345 | } 346 | } 347 | } 348 | 349 | if (end_character(&delimiter)) { 350 | array_push(&scanner->delimiters, delimiter); 351 | lexer->result_symbol = STRING_START; 352 | scanner->inside_f_string = is_format(&delimiter); 353 | return true; 354 | } 355 | if (has_flags) { 356 | return false; 357 | } 358 | } 359 | 360 | return false; 361 | } 362 | 363 | unsigned tree_sitter_python_external_scanner_serialize(void *payload, char *buffer) { 364 | Scanner *scanner = (Scanner *)payload; 365 | 366 | size_t size = 0; 367 | 368 | buffer[size++] = (char)scanner->inside_f_string; 369 | 370 | size_t delimiter_count = scanner->delimiters.size; 371 | if (delimiter_count > UINT8_MAX) { 372 | delimiter_count = UINT8_MAX; 373 | } 374 | buffer[size++] = (char)delimiter_count; 375 | 376 | if (delimiter_count > 0) { 377 | memcpy(&buffer[size], scanner->delimiters.contents, delimiter_count); 378 | } 379 | size += delimiter_count; 380 | 381 | uint32_t iter = 1; 382 | for (; iter < scanner->indents.size && size < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; ++iter) { 383 | uint16_t indent_value = *array_get(&scanner->indents, iter); 384 | buffer[size++] = (char)(indent_value & 0xFF); 385 | buffer[size++] = (char)((indent_value >> 8) & 0xFF); 386 | } 387 | 388 | return size; 389 | } 390 | 391 | void tree_sitter_python_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) { 392 | Scanner *scanner = (Scanner *)payload; 393 | 394 | array_delete(&scanner->delimiters); 395 | array_delete(&scanner->indents); 396 | array_push(&scanner->indents, 0); 397 | 398 | if (length > 0) { 399 | size_t size = 0; 400 | 401 | scanner->inside_f_string = (bool)buffer[size++]; 402 | 403 | size_t delimiter_count = (uint8_t)buffer[size++]; 404 | if (delimiter_count > 0) { 405 | array_reserve(&scanner->delimiters, delimiter_count); 406 | scanner->delimiters.size = delimiter_count; 407 | memcpy(scanner->delimiters.contents, &buffer[size], delimiter_count); 408 | size += delimiter_count; 409 | } 410 | 411 | for (; size + 1 < length; size += 2) { 412 | uint16_t indent_value = (unsigned char)buffer[size] | ((unsigned char)buffer[size + 1] << 8); 413 | array_push(&scanner->indents, indent_value); 414 | } 415 | } 416 | } 417 | 418 | void *tree_sitter_python_external_scanner_create() { 419 | #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) 420 | _Static_assert(sizeof(Delimiter) == sizeof(char), ""); 421 | #else 422 | assert(sizeof(Delimiter) == sizeof(char)); 423 | #endif 424 | Scanner *scanner = calloc(1, sizeof(Scanner)); 425 | array_init(&scanner->indents); 426 | array_init(&scanner->delimiters); 427 | tree_sitter_python_external_scanner_deserialize(scanner, NULL, 0); 428 | return scanner; 429 | } 430 | 431 | void tree_sitter_python_external_scanner_destroy(void *payload) { 432 | Scanner *scanner = (Scanner *)payload; 433 | array_delete(&scanner->indents); 434 | array_delete(&scanner->delimiters); 435 | free(scanner); 436 | } 437 | -------------------------------------------------------------------------------- /src/tree_sitter/alloc.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_ALLOC_H_ 2 | #define TREE_SITTER_ALLOC_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | // Allow clients to override allocation functions 13 | #ifdef TREE_SITTER_REUSE_ALLOCATOR 14 | 15 | extern void *(*ts_current_malloc)(size_t size); 16 | extern void *(*ts_current_calloc)(size_t count, size_t size); 17 | extern void *(*ts_current_realloc)(void *ptr, size_t size); 18 | extern void (*ts_current_free)(void *ptr); 19 | 20 | #ifndef ts_malloc 21 | #define ts_malloc ts_current_malloc 22 | #endif 23 | #ifndef ts_calloc 24 | #define ts_calloc ts_current_calloc 25 | #endif 26 | #ifndef ts_realloc 27 | #define ts_realloc ts_current_realloc 28 | #endif 29 | #ifndef ts_free 30 | #define ts_free ts_current_free 31 | #endif 32 | 33 | #else 34 | 35 | #ifndef ts_malloc 36 | #define ts_malloc malloc 37 | #endif 38 | #ifndef ts_calloc 39 | #define ts_calloc calloc 40 | #endif 41 | #ifndef ts_realloc 42 | #define ts_realloc realloc 43 | #endif 44 | #ifndef ts_free 45 | #define ts_free free 46 | #endif 47 | 48 | #endif 49 | 50 | #ifdef __cplusplus 51 | } 52 | #endif 53 | 54 | #endif // TREE_SITTER_ALLOC_H_ 55 | -------------------------------------------------------------------------------- /src/tree_sitter/array.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_ARRAY_H_ 2 | #define TREE_SITTER_ARRAY_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "./alloc.h" 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #ifdef _MSC_VER 17 | #pragma warning(push) 18 | #pragma warning(disable : 4101) 19 | #elif defined(__GNUC__) || defined(__clang__) 20 | #pragma GCC diagnostic push 21 | #pragma GCC diagnostic ignored "-Wunused-variable" 22 | #endif 23 | 24 | #define Array(T) \ 25 | struct { \ 26 | T *contents; \ 27 | uint32_t size; \ 28 | uint32_t capacity; \ 29 | } 30 | 31 | /// Initialize an array. 32 | #define array_init(self) \ 33 | ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) 34 | 35 | /// Create an empty array. 36 | #define array_new() \ 37 | { NULL, 0, 0 } 38 | 39 | /// Get a pointer to the element at a given `index` in the array. 40 | #define array_get(self, _index) \ 41 | (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index]) 42 | 43 | /// Get a pointer to the first element in the array. 44 | #define array_front(self) array_get(self, 0) 45 | 46 | /// Get a pointer to the last element in the array. 47 | #define array_back(self) array_get(self, (self)->size - 1) 48 | 49 | /// Clear the array, setting its size to zero. Note that this does not free any 50 | /// memory allocated for the array's contents. 51 | #define array_clear(self) ((self)->size = 0) 52 | 53 | /// Reserve `new_capacity` elements of space in the array. If `new_capacity` is 54 | /// less than the array's current capacity, this function has no effect. 55 | #define array_reserve(self, new_capacity) \ 56 | _array__reserve((Array *)(self), array_elem_size(self), new_capacity) 57 | 58 | /// Free any memory allocated for this array. Note that this does not free any 59 | /// memory allocated for the array's contents. 60 | #define array_delete(self) _array__delete((Array *)(self)) 61 | 62 | /// Push a new `element` onto the end of the array. 63 | #define array_push(self, element) \ 64 | (_array__grow((Array *)(self), 1, array_elem_size(self)), \ 65 | (self)->contents[(self)->size++] = (element)) 66 | 67 | /// Increase the array's size by `count` elements. 68 | /// New elements are zero-initialized. 69 | #define array_grow_by(self, count) \ 70 | do { \ 71 | if ((count) == 0) break; \ 72 | _array__grow((Array *)(self), count, array_elem_size(self)); \ 73 | memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \ 74 | (self)->size += (count); \ 75 | } while (0) 76 | 77 | /// Append all elements from one array to the end of another. 78 | #define array_push_all(self, other) \ 79 | array_extend((self), (other)->size, (other)->contents) 80 | 81 | /// Append `count` elements to the end of the array, reading their values from the 82 | /// `contents` pointer. 83 | #define array_extend(self, count, contents) \ 84 | _array__splice( \ 85 | (Array *)(self), array_elem_size(self), (self)->size, \ 86 | 0, count, contents \ 87 | ) 88 | 89 | /// Remove `old_count` elements from the array starting at the given `index`. At 90 | /// the same index, insert `new_count` new elements, reading their values from the 91 | /// `new_contents` pointer. 92 | #define array_splice(self, _index, old_count, new_count, new_contents) \ 93 | _array__splice( \ 94 | (Array *)(self), array_elem_size(self), _index, \ 95 | old_count, new_count, new_contents \ 96 | ) 97 | 98 | /// Insert one `element` into the array at the given `index`. 99 | #define array_insert(self, _index, element) \ 100 | _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element)) 101 | 102 | /// Remove one element from the array at the given `index`. 103 | #define array_erase(self, _index) \ 104 | _array__erase((Array *)(self), array_elem_size(self), _index) 105 | 106 | /// Pop the last element off the array, returning the element by value. 107 | #define array_pop(self) ((self)->contents[--(self)->size]) 108 | 109 | /// Assign the contents of one array to another, reallocating if necessary. 110 | #define array_assign(self, other) \ 111 | _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self)) 112 | 113 | /// Swap one array with another 114 | #define array_swap(self, other) \ 115 | _array__swap((Array *)(self), (Array *)(other)) 116 | 117 | /// Get the size of the array contents 118 | #define array_elem_size(self) (sizeof *(self)->contents) 119 | 120 | /// Search a sorted array for a given `needle` value, using the given `compare` 121 | /// callback to determine the order. 122 | /// 123 | /// If an existing element is found to be equal to `needle`, then the `index` 124 | /// out-parameter is set to the existing value's index, and the `exists` 125 | /// out-parameter is set to true. Otherwise, `index` is set to an index where 126 | /// `needle` should be inserted in order to preserve the sorting, and `exists` 127 | /// is set to false. 128 | #define array_search_sorted_with(self, compare, needle, _index, _exists) \ 129 | _array__search_sorted(self, 0, compare, , needle, _index, _exists) 130 | 131 | /// Search a sorted array for a given `needle` value, using integer comparisons 132 | /// of a given struct field (specified with a leading dot) to determine the order. 133 | /// 134 | /// See also `array_search_sorted_with`. 135 | #define array_search_sorted_by(self, field, needle, _index, _exists) \ 136 | _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists) 137 | 138 | /// Insert a given `value` into a sorted array, using the given `compare` 139 | /// callback to determine the order. 140 | #define array_insert_sorted_with(self, compare, value) \ 141 | do { \ 142 | unsigned _index, _exists; \ 143 | array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ 144 | if (!_exists) array_insert(self, _index, value); \ 145 | } while (0) 146 | 147 | /// Insert a given `value` into a sorted array, using integer comparisons of 148 | /// a given struct field (specified with a leading dot) to determine the order. 149 | /// 150 | /// See also `array_search_sorted_by`. 151 | #define array_insert_sorted_by(self, field, value) \ 152 | do { \ 153 | unsigned _index, _exists; \ 154 | array_search_sorted_by(self, field, (value) field, &_index, &_exists); \ 155 | if (!_exists) array_insert(self, _index, value); \ 156 | } while (0) 157 | 158 | // Private 159 | 160 | typedef Array(void) Array; 161 | 162 | /// This is not what you're looking for, see `array_delete`. 163 | static inline void _array__delete(Array *self) { 164 | if (self->contents) { 165 | ts_free(self->contents); 166 | self->contents = NULL; 167 | self->size = 0; 168 | self->capacity = 0; 169 | } 170 | } 171 | 172 | /// This is not what you're looking for, see `array_erase`. 173 | static inline void _array__erase(Array *self, size_t element_size, 174 | uint32_t index) { 175 | assert(index < self->size); 176 | char *contents = (char *)self->contents; 177 | memmove(contents + index * element_size, contents + (index + 1) * element_size, 178 | (self->size - index - 1) * element_size); 179 | self->size--; 180 | } 181 | 182 | /// This is not what you're looking for, see `array_reserve`. 183 | static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) { 184 | if (new_capacity > self->capacity) { 185 | if (self->contents) { 186 | self->contents = ts_realloc(self->contents, new_capacity * element_size); 187 | } else { 188 | self->contents = ts_malloc(new_capacity * element_size); 189 | } 190 | self->capacity = new_capacity; 191 | } 192 | } 193 | 194 | /// This is not what you're looking for, see `array_assign`. 195 | static inline void _array__assign(Array *self, const Array *other, size_t element_size) { 196 | _array__reserve(self, element_size, other->size); 197 | self->size = other->size; 198 | memcpy(self->contents, other->contents, self->size * element_size); 199 | } 200 | 201 | /// This is not what you're looking for, see `array_swap`. 202 | static inline void _array__swap(Array *self, Array *other) { 203 | Array swap = *other; 204 | *other = *self; 205 | *self = swap; 206 | } 207 | 208 | /// This is not what you're looking for, see `array_push` or `array_grow_by`. 209 | static inline void _array__grow(Array *self, uint32_t count, size_t element_size) { 210 | uint32_t new_size = self->size + count; 211 | if (new_size > self->capacity) { 212 | uint32_t new_capacity = self->capacity * 2; 213 | if (new_capacity < 8) new_capacity = 8; 214 | if (new_capacity < new_size) new_capacity = new_size; 215 | _array__reserve(self, element_size, new_capacity); 216 | } 217 | } 218 | 219 | /// This is not what you're looking for, see `array_splice`. 220 | static inline void _array__splice(Array *self, size_t element_size, 221 | uint32_t index, uint32_t old_count, 222 | uint32_t new_count, const void *elements) { 223 | uint32_t new_size = self->size + new_count - old_count; 224 | uint32_t old_end = index + old_count; 225 | uint32_t new_end = index + new_count; 226 | assert(old_end <= self->size); 227 | 228 | _array__reserve(self, element_size, new_size); 229 | 230 | char *contents = (char *)self->contents; 231 | if (self->size > old_end) { 232 | memmove( 233 | contents + new_end * element_size, 234 | contents + old_end * element_size, 235 | (self->size - old_end) * element_size 236 | ); 237 | } 238 | if (new_count > 0) { 239 | if (elements) { 240 | memcpy( 241 | (contents + index * element_size), 242 | elements, 243 | new_count * element_size 244 | ); 245 | } else { 246 | memset( 247 | (contents + index * element_size), 248 | 0, 249 | new_count * element_size 250 | ); 251 | } 252 | } 253 | self->size += new_count - old_count; 254 | } 255 | 256 | /// A binary search routine, based on Rust's `std::slice::binary_search_by`. 257 | /// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`. 258 | #define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \ 259 | do { \ 260 | *(_index) = start; \ 261 | *(_exists) = false; \ 262 | uint32_t size = (self)->size - *(_index); \ 263 | if (size == 0) break; \ 264 | int comparison; \ 265 | while (size > 1) { \ 266 | uint32_t half_size = size / 2; \ 267 | uint32_t mid_index = *(_index) + half_size; \ 268 | comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \ 269 | if (comparison <= 0) *(_index) = mid_index; \ 270 | size -= half_size; \ 271 | } \ 272 | comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \ 273 | if (comparison == 0) *(_exists) = true; \ 274 | else if (comparison < 0) *(_index) += 1; \ 275 | } while (0) 276 | 277 | /// Helper macro for the `_sorted_by` routines below. This takes the left (existing) 278 | /// parameter by reference in order to work with the generic sorting function above. 279 | #define _compare_int(a, b) ((int)*(a) - (int)(b)) 280 | 281 | #ifdef _MSC_VER 282 | #pragma warning(pop) 283 | #elif defined(__GNUC__) || defined(__clang__) 284 | #pragma GCC diagnostic pop 285 | #endif 286 | 287 | #ifdef __cplusplus 288 | } 289 | #endif 290 | 291 | #endif // TREE_SITTER_ARRAY_H_ 292 | -------------------------------------------------------------------------------- /src/tree_sitter/parser.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_PARSER_H_ 2 | #define TREE_SITTER_PARSER_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #define ts_builtin_sym_error ((TSSymbol)-1) 13 | #define ts_builtin_sym_end 0 14 | #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 15 | 16 | #ifndef TREE_SITTER_API_H_ 17 | typedef uint16_t TSStateId; 18 | typedef uint16_t TSSymbol; 19 | typedef uint16_t TSFieldId; 20 | typedef struct TSLanguage TSLanguage; 21 | #endif 22 | 23 | typedef struct { 24 | TSFieldId field_id; 25 | uint8_t child_index; 26 | bool inherited; 27 | } TSFieldMapEntry; 28 | 29 | typedef struct { 30 | uint16_t index; 31 | uint16_t length; 32 | } TSFieldMapSlice; 33 | 34 | typedef struct { 35 | bool visible; 36 | bool named; 37 | bool supertype; 38 | } TSSymbolMetadata; 39 | 40 | typedef struct TSLexer TSLexer; 41 | 42 | struct TSLexer { 43 | int32_t lookahead; 44 | TSSymbol result_symbol; 45 | void (*advance)(TSLexer *, bool); 46 | void (*mark_end)(TSLexer *); 47 | uint32_t (*get_column)(TSLexer *); 48 | bool (*is_at_included_range_start)(const TSLexer *); 49 | bool (*eof)(const TSLexer *); 50 | void (*log)(const TSLexer *, const char *, ...); 51 | }; 52 | 53 | typedef enum { 54 | TSParseActionTypeShift, 55 | TSParseActionTypeReduce, 56 | TSParseActionTypeAccept, 57 | TSParseActionTypeRecover, 58 | } TSParseActionType; 59 | 60 | typedef union { 61 | struct { 62 | uint8_t type; 63 | TSStateId state; 64 | bool extra; 65 | bool repetition; 66 | } shift; 67 | struct { 68 | uint8_t type; 69 | uint8_t child_count; 70 | TSSymbol symbol; 71 | int16_t dynamic_precedence; 72 | uint16_t production_id; 73 | } reduce; 74 | uint8_t type; 75 | } TSParseAction; 76 | 77 | typedef struct { 78 | uint16_t lex_state; 79 | uint16_t external_lex_state; 80 | } TSLexMode; 81 | 82 | typedef union { 83 | TSParseAction action; 84 | struct { 85 | uint8_t count; 86 | bool reusable; 87 | } entry; 88 | } TSParseActionEntry; 89 | 90 | typedef struct { 91 | int32_t start; 92 | int32_t end; 93 | } TSCharacterRange; 94 | 95 | struct TSLanguage { 96 | uint32_t version; 97 | uint32_t symbol_count; 98 | uint32_t alias_count; 99 | uint32_t token_count; 100 | uint32_t external_token_count; 101 | uint32_t state_count; 102 | uint32_t large_state_count; 103 | uint32_t production_id_count; 104 | uint32_t field_count; 105 | uint16_t max_alias_sequence_length; 106 | const uint16_t *parse_table; 107 | const uint16_t *small_parse_table; 108 | const uint32_t *small_parse_table_map; 109 | const TSParseActionEntry *parse_actions; 110 | const char * const *symbol_names; 111 | const char * const *field_names; 112 | const TSFieldMapSlice *field_map_slices; 113 | const TSFieldMapEntry *field_map_entries; 114 | const TSSymbolMetadata *symbol_metadata; 115 | const TSSymbol *public_symbol_map; 116 | const uint16_t *alias_map; 117 | const TSSymbol *alias_sequences; 118 | const TSLexMode *lex_modes; 119 | bool (*lex_fn)(TSLexer *, TSStateId); 120 | bool (*keyword_lex_fn)(TSLexer *, TSStateId); 121 | TSSymbol keyword_capture_token; 122 | struct { 123 | const bool *states; 124 | const TSSymbol *symbol_map; 125 | void *(*create)(void); 126 | void (*destroy)(void *); 127 | bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); 128 | unsigned (*serialize)(void *, char *); 129 | void (*deserialize)(void *, const char *, unsigned); 130 | } external_scanner; 131 | const TSStateId *primary_state_ids; 132 | }; 133 | 134 | static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { 135 | uint32_t index = 0; 136 | uint32_t size = len - index; 137 | while (size > 1) { 138 | uint32_t half_size = size / 2; 139 | uint32_t mid_index = index + half_size; 140 | TSCharacterRange *range = &ranges[mid_index]; 141 | if (lookahead >= range->start && lookahead <= range->end) { 142 | return true; 143 | } else if (lookahead > range->end) { 144 | index = mid_index; 145 | } 146 | size -= half_size; 147 | } 148 | TSCharacterRange *range = &ranges[index]; 149 | return (lookahead >= range->start && lookahead <= range->end); 150 | } 151 | 152 | /* 153 | * Lexer Macros 154 | */ 155 | 156 | #ifdef _MSC_VER 157 | #define UNUSED __pragma(warning(suppress : 4101)) 158 | #else 159 | #define UNUSED __attribute__((unused)) 160 | #endif 161 | 162 | #define START_LEXER() \ 163 | bool result = false; \ 164 | bool skip = false; \ 165 | UNUSED \ 166 | bool eof = false; \ 167 | int32_t lookahead; \ 168 | goto start; \ 169 | next_state: \ 170 | lexer->advance(lexer, skip); \ 171 | start: \ 172 | skip = false; \ 173 | lookahead = lexer->lookahead; 174 | 175 | #define ADVANCE(state_value) \ 176 | { \ 177 | state = state_value; \ 178 | goto next_state; \ 179 | } 180 | 181 | #define ADVANCE_MAP(...) \ 182 | { \ 183 | static const uint16_t map[] = { __VA_ARGS__ }; \ 184 | for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \ 185 | if (map[i] == lookahead) { \ 186 | state = map[i + 1]; \ 187 | goto next_state; \ 188 | } \ 189 | } \ 190 | } 191 | 192 | #define SKIP(state_value) \ 193 | { \ 194 | skip = true; \ 195 | state = state_value; \ 196 | goto next_state; \ 197 | } 198 | 199 | #define ACCEPT_TOKEN(symbol_value) \ 200 | result = true; \ 201 | lexer->result_symbol = symbol_value; \ 202 | lexer->mark_end(lexer); 203 | 204 | #define END_STATE() return result; 205 | 206 | /* 207 | * Parse Table Macros 208 | */ 209 | 210 | #define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT) 211 | 212 | #define STATE(id) id 213 | 214 | #define ACTIONS(id) id 215 | 216 | #define SHIFT(state_value) \ 217 | {{ \ 218 | .shift = { \ 219 | .type = TSParseActionTypeShift, \ 220 | .state = (state_value) \ 221 | } \ 222 | }} 223 | 224 | #define SHIFT_REPEAT(state_value) \ 225 | {{ \ 226 | .shift = { \ 227 | .type = TSParseActionTypeShift, \ 228 | .state = (state_value), \ 229 | .repetition = true \ 230 | } \ 231 | }} 232 | 233 | #define SHIFT_EXTRA() \ 234 | {{ \ 235 | .shift = { \ 236 | .type = TSParseActionTypeShift, \ 237 | .extra = true \ 238 | } \ 239 | }} 240 | 241 | #define REDUCE(symbol_name, children, precedence, prod_id) \ 242 | {{ \ 243 | .reduce = { \ 244 | .type = TSParseActionTypeReduce, \ 245 | .symbol = symbol_name, \ 246 | .child_count = children, \ 247 | .dynamic_precedence = precedence, \ 248 | .production_id = prod_id \ 249 | }, \ 250 | }} 251 | 252 | #define RECOVER() \ 253 | {{ \ 254 | .type = TSParseActionTypeRecover \ 255 | }} 256 | 257 | #define ACCEPT_INPUT() \ 258 | {{ \ 259 | .type = TSParseActionTypeAccept \ 260 | }} 261 | 262 | #ifdef __cplusplus 263 | } 264 | #endif 265 | 266 | #endif // TREE_SITTER_PARSER_H_ 267 | -------------------------------------------------------------------------------- /test/corpus/errors.txt: -------------------------------------------------------------------------------- 1 | ==================================== 2 | An error before a string literal 3 | :error 4 | ==================================== 5 | 6 | def a(b): 7 | c. 8 | 9 | """ 10 | d 11 | """ 12 | 13 | e 14 | 15 | --- 16 | 17 | (module 18 | (function_definition 19 | (identifier) 20 | (parameters 21 | (identifier)) 22 | (ERROR 23 | (identifier)) 24 | (block 25 | (expression_statement 26 | (string 27 | (string_start) 28 | (string_content) 29 | (string_end))) 30 | (expression_statement 31 | (identifier))))) 32 | -------------------------------------------------------------------------------- /test/corpus/literals.txt: -------------------------------------------------------------------------------- 1 | ================================================================================ 2 | Integers 3 | ================================================================================ 4 | 5 | -1 6 | 0xDEAD 7 | 0XDEAD 8 | 1j 9 | -1j 10 | 0o123 11 | 0O123 12 | 0b001 13 | 0B001 14 | 1_1 15 | 0B1_1 16 | 0O1_1 17 | 0L 18 | 19 | -------------------------------------------------------------------------------- 20 | 21 | (module 22 | (expression_statement 23 | (unary_operator 24 | (integer))) 25 | (expression_statement 26 | (integer)) 27 | (expression_statement 28 | (integer)) 29 | (expression_statement 30 | (integer)) 31 | (expression_statement 32 | (unary_operator 33 | (integer))) 34 | (expression_statement 35 | (integer)) 36 | (expression_statement 37 | (integer)) 38 | (expression_statement 39 | (integer)) 40 | (expression_statement 41 | (integer)) 42 | (expression_statement 43 | (integer)) 44 | (expression_statement 45 | (integer)) 46 | (expression_statement 47 | (integer)) 48 | (expression_statement 49 | (integer))) 50 | 51 | ================================================================================ 52 | Floats 53 | ================================================================================ 54 | 55 | -.6_6 56 | +.1_1 57 | 123.4123 58 | 123.123J 59 | 1_1.3_1 60 | 1_1. 61 | 1e+3_4j 62 | .3e1_4 63 | 64 | -------------------------------------------------------------------------------- 65 | 66 | (module 67 | (expression_statement 68 | (unary_operator 69 | (float))) 70 | (expression_statement 71 | (unary_operator 72 | (float))) 73 | (expression_statement 74 | (float)) 75 | (expression_statement 76 | (float)) 77 | (expression_statement 78 | (float)) 79 | (expression_statement 80 | (float)) 81 | (expression_statement 82 | (float)) 83 | (expression_statement 84 | (float))) 85 | 86 | ================================================================================ 87 | Scientific Notation Floats 88 | ================================================================================ 89 | 90 | 1e322 91 | 1e-3 92 | 1e+3 93 | 1.8e10 94 | 1.e10 95 | -1e10 96 | 97 | -------------------------------------------------------------------------------- 98 | 99 | (module 100 | (expression_statement 101 | (float)) 102 | (expression_statement 103 | (float)) 104 | (expression_statement 105 | (float)) 106 | (expression_statement 107 | (float)) 108 | (expression_statement 109 | (float)) 110 | (expression_statement 111 | (unary_operator 112 | (float)))) 113 | 114 | ================================================================================ 115 | Strings 116 | ================================================================================ 117 | 118 | "I'm ok" 119 | '"ok"' 120 | UR'bye' 121 | b'sup' 122 | B"sup" 123 | `1` 124 | "\\" 125 | "/" 126 | "multiline \ 127 | string" 128 | b"\x12\u12\U12\x13\N{WINKING FACE}" 129 | "\xab\1\12\123\'\"\a\b\f\r\n\t\v\\" 130 | "\xgh\o123\p\q\c\d\e\u12\U1234" 131 | f'\N{GREEK CAPITAL LETTER DELTA}' 132 | 133 | -------------------------------------------------------------------------------- 134 | 135 | (module 136 | (expression_statement 137 | (string 138 | (string_start) 139 | (string_content) 140 | (string_end))) 141 | (expression_statement 142 | (string 143 | (string_start) 144 | (string_content) 145 | (string_end))) 146 | (expression_statement 147 | (string 148 | (string_start) 149 | (string_content) 150 | (string_end))) 151 | (expression_statement 152 | (string 153 | (string_start) 154 | (string_content) 155 | (string_end))) 156 | (expression_statement 157 | (string 158 | (string_start) 159 | (string_content) 160 | (string_end))) 161 | (expression_statement 162 | (string 163 | (string_start) 164 | (string_content) 165 | (string_end))) 166 | (expression_statement 167 | (string 168 | (string_start) 169 | (string_content 170 | (escape_sequence)) 171 | (string_end))) 172 | (expression_statement 173 | (string 174 | (string_start) 175 | (string_content) 176 | (string_end))) 177 | (expression_statement 178 | (string 179 | (string_start) 180 | (string_content 181 | (escape_sequence)) 182 | (string_end))) 183 | (expression_statement 184 | (string 185 | (string_start) 186 | (string_content 187 | (escape_sequence) 188 | (escape_sequence)) 189 | (string_end))) 190 | (expression_statement 191 | (string 192 | (string_start) 193 | (string_content 194 | (escape_sequence) 195 | (escape_sequence) 196 | (escape_sequence) 197 | (escape_sequence) 198 | (escape_sequence) 199 | (escape_sequence) 200 | (escape_sequence) 201 | (escape_sequence) 202 | (escape_sequence) 203 | (escape_sequence) 204 | (escape_sequence) 205 | (escape_sequence) 206 | (escape_sequence) 207 | (escape_sequence)) 208 | (string_end))) 209 | (expression_statement 210 | (string 211 | (string_start) 212 | (string_content) 213 | (string_end))) 214 | (expression_statement 215 | (string 216 | (string_start) 217 | (string_content 218 | (escape_sequence)) 219 | (string_end)))) 220 | 221 | ================================================================================ 222 | Raw strings 223 | ================================================================================ 224 | 225 | 'ab\x00cd' 226 | "\n" 227 | 228 | # no escape sequences in these 229 | r'ab\x00cd' 230 | ur"\n" 231 | 232 | # raw f-string 233 | fr"\{0}" 234 | 235 | r"\\" 236 | r'"a\ 237 | de\ 238 | fg"' 239 | 240 | -------------------------------------------------------------------------------- 241 | 242 | (module 243 | (expression_statement 244 | (string 245 | (string_start) 246 | (string_content 247 | (escape_sequence)) 248 | (string_end))) 249 | (expression_statement 250 | (string 251 | (string_start) 252 | (string_content 253 | (escape_sequence)) 254 | (string_end))) 255 | (comment) 256 | (expression_statement 257 | (string 258 | (string_start) 259 | (string_content) 260 | (string_end))) 261 | (expression_statement 262 | (string 263 | (string_start) 264 | (string_content) 265 | (string_end))) 266 | (comment) 267 | (expression_statement 268 | (string 269 | (string_start) 270 | (string_content) 271 | (interpolation 272 | (integer)) 273 | (string_end))) 274 | (expression_statement 275 | (string 276 | (string_start) 277 | (string_end))) 278 | (expression_statement 279 | (string 280 | (string_start) 281 | (string_content) 282 | (string_end)))) 283 | 284 | ================================================================================ 285 | Raw strings with escaped quotes 286 | ================================================================================ 287 | 288 | re.compile(r"(\n|\A)#include\s*['\"]" 289 | r"(?P[\w\d./\\]+[.]src)['\"]") 290 | 291 | -------------------------------------------------------------------------------- 292 | 293 | (module 294 | (expression_statement 295 | (call 296 | (attribute 297 | (identifier) 298 | (identifier)) 299 | (argument_list 300 | (concatenated_string 301 | (string 302 | (string_start) 303 | (string_content) 304 | (string_end)) 305 | (string 306 | (string_start) 307 | (string_content) 308 | (string_end))))))) 309 | 310 | ================================================================================ 311 | Format strings 312 | ================================================================================ 313 | 314 | # nested! 315 | f"a {b(f'c {e} d')} e" 316 | f"""a"{b}c""" 317 | f"""a""{b}c""" 318 | f"a {{}} e" 319 | f"a {b}}}" 320 | f"a {{{b}" 321 | f"a {{b}}" 322 | f"a {{{b}}}" 323 | f"{c,}" 324 | f"{yield d}" 325 | f"{*a,}" 326 | 327 | def function(): 328 | return f""" 329 | {"string1" if True else 330 | "string2"}""" 331 | 332 | def test(self): 333 | self.assertEqual(f'''A complex trick: { 334 | 2 # two 335 | }''', 'A complex trick: 2') 336 | 337 | -------------------------------------------------------------------------------- 338 | 339 | (module 340 | (comment) 341 | (expression_statement 342 | (string 343 | (string_start) 344 | (string_content) 345 | (interpolation 346 | (call 347 | (identifier) 348 | (argument_list 349 | (string 350 | (string_start) 351 | (string_content) 352 | (interpolation 353 | (identifier)) 354 | (string_content) 355 | (string_end))))) 356 | (string_content) 357 | (string_end))) 358 | (expression_statement 359 | (string 360 | (string_start) 361 | (string_content) 362 | (interpolation 363 | (identifier)) 364 | (string_content) 365 | (string_end))) 366 | (expression_statement 367 | (string 368 | (string_start) 369 | (string_content) 370 | (interpolation 371 | (identifier)) 372 | (string_content) 373 | (string_end))) 374 | (expression_statement 375 | (string 376 | (string_start) 377 | (string_content 378 | (escape_interpolation) 379 | (escape_interpolation)) 380 | (string_end))) 381 | (expression_statement 382 | (string 383 | (string_start) 384 | (string_content) 385 | (interpolation 386 | (identifier)) 387 | (string_content 388 | (escape_interpolation)) 389 | (string_end))) 390 | (expression_statement 391 | (string 392 | (string_start) 393 | (string_content 394 | (escape_interpolation)) 395 | (interpolation 396 | (identifier)) 397 | (string_end))) 398 | (expression_statement 399 | (string 400 | (string_start) 401 | (string_content 402 | (escape_interpolation) 403 | (escape_interpolation)) 404 | (string_end))) 405 | (expression_statement 406 | (string 407 | (string_start) 408 | (string_content 409 | (escape_interpolation)) 410 | (interpolation 411 | (identifier)) 412 | (string_content 413 | (escape_interpolation)) 414 | (string_end))) 415 | (expression_statement 416 | (string 417 | (string_start) 418 | (interpolation 419 | (expression_list 420 | (identifier))) 421 | (string_end))) 422 | (expression_statement 423 | (string 424 | (string_start) 425 | (interpolation 426 | (yield 427 | (identifier))) 428 | (string_end))) 429 | (expression_statement 430 | (string 431 | (string_start) 432 | (interpolation 433 | (expression_list 434 | (list_splat 435 | (identifier)))) 436 | (string_end))) 437 | (function_definition 438 | (identifier) 439 | (parameters) 440 | (block 441 | (return_statement 442 | (string 443 | (string_start) 444 | (string_content) 445 | (interpolation 446 | (conditional_expression 447 | (string 448 | (string_start) 449 | (string_content) 450 | (string_end)) 451 | (true) 452 | (string 453 | (string_start) 454 | (string_content) 455 | (string_end)))) 456 | (string_end))))) 457 | (function_definition 458 | (identifier) 459 | (parameters 460 | (identifier)) 461 | (block 462 | (expression_statement 463 | (call 464 | (attribute 465 | (identifier) 466 | (identifier)) 467 | (argument_list 468 | (string 469 | (string_start) 470 | (string_content) 471 | (interpolation 472 | (integer) 473 | (comment)) 474 | (string_end)) 475 | (string 476 | (string_start) 477 | (string_content) 478 | (string_end)))))))) 479 | 480 | ================================================================================ 481 | Format strings with format specifiers 482 | ================================================================================ 483 | 484 | f"a {b:2} {c:34.5}" 485 | f"{b:{c.d}.{d.e}}" 486 | f"{a:#06x}" 487 | f"{a=}" 488 | f"{a=:.2f}" 489 | f"{value:{width + padding!r}.{precision}}" 490 | 491 | -------------------------------------------------------------------------------- 492 | 493 | (module 494 | (expression_statement 495 | (string 496 | (string_start) 497 | (string_content) 498 | (interpolation 499 | (identifier) 500 | (format_specifier)) 501 | (string_content) 502 | (interpolation 503 | (identifier) 504 | (format_specifier)) 505 | (string_end))) 506 | (expression_statement 507 | (string 508 | (string_start) 509 | (interpolation 510 | (identifier) 511 | (format_specifier 512 | (format_expression 513 | (attribute 514 | (identifier) 515 | (identifier))) 516 | (format_expression 517 | (attribute 518 | (identifier) 519 | (identifier))))) 520 | (string_end))) 521 | (expression_statement 522 | (string 523 | (string_start) 524 | (interpolation 525 | (identifier) 526 | (format_specifier)) 527 | (string_end))) 528 | (expression_statement 529 | (string 530 | (string_start) 531 | (interpolation 532 | (identifier)) 533 | (string_end))) 534 | (expression_statement 535 | (string 536 | (string_start) 537 | (interpolation 538 | (identifier) 539 | (format_specifier)) 540 | (string_end))) 541 | (expression_statement 542 | (string 543 | (string_start) 544 | (interpolation 545 | (identifier) 546 | (format_specifier 547 | (format_expression 548 | (binary_operator 549 | (identifier) 550 | (identifier)) 551 | (type_conversion)) 552 | (format_expression 553 | (identifier)))) 554 | (string_end)))) 555 | 556 | ================================================================================ 557 | Unicode escape sequences 558 | ================================================================================ 559 | 560 | "\x12 \123 \u1234" 561 | 562 | -------------------------------------------------------------------------------- 563 | 564 | (module 565 | (expression_statement 566 | (string 567 | (string_start) 568 | (string_content 569 | (escape_sequence) 570 | (escape_sequence) 571 | (escape_sequence)) 572 | (string_end)))) 573 | 574 | ================================================================================ 575 | Other primitives 576 | ================================================================================ 577 | 578 | True 579 | False 580 | None 581 | 582 | -------------------------------------------------------------------------------- 583 | 584 | (module 585 | (expression_statement 586 | (true)) 587 | (expression_statement 588 | (false)) 589 | (expression_statement 590 | (none))) 591 | 592 | ================================================================================ 593 | Concatenated strings 594 | ================================================================================ 595 | 596 | "one" "two" "three" 597 | 598 | -------------------------------------------------------------------------------- 599 | 600 | (module 601 | (expression_statement 602 | (concatenated_string 603 | (string 604 | (string_start) 605 | (string_content) 606 | (string_end)) 607 | (string 608 | (string_start) 609 | (string_content) 610 | (string_end)) 611 | (string 612 | (string_start) 613 | (string_content) 614 | (string_end))))) 615 | 616 | ================================================================================ 617 | Multi-line strings 618 | ================================================================================ 619 | 620 | """ 621 | A double quote hello, 622 | without double or single quotes. 623 | """ 624 | 625 | """ 626 | A double quote "hello", 627 | with double quotes. 628 | """ 629 | 630 | """ 631 | A double quote 'hello', 632 | with single quotes. 633 | """ 634 | 635 | ''' 636 | A single quote hello, 637 | without double or single quotes. 638 | ''' 639 | 640 | ''' 641 | A single quote 'hello', 642 | with single quotes. 643 | ''' 644 | 645 | ''' 646 | A single quote "hello", 647 | with double quotes. 648 | ''' 649 | 650 | """ 651 | A double quote hello\n\ 652 | with an escaped newline\n\ 653 | and another escaped newline\n\ 654 | """ 655 | 656 | -------------------------------------------------------------------------------- 657 | 658 | (module 659 | (expression_statement 660 | (string 661 | (string_start) 662 | (string_content) 663 | (string_end))) 664 | (expression_statement 665 | (string 666 | (string_start) 667 | (string_content) 668 | (string_end))) 669 | (expression_statement 670 | (string 671 | (string_start) 672 | (string_content) 673 | (string_end))) 674 | (expression_statement 675 | (string 676 | (string_start) 677 | (string_content) 678 | (string_end))) 679 | (expression_statement 680 | (string 681 | (string_start) 682 | (string_content) 683 | (string_end))) 684 | (expression_statement 685 | (string 686 | (string_start) 687 | (string_content) 688 | (string_end))) 689 | (expression_statement 690 | (string 691 | (string_start) 692 | (string_content 693 | (escape_sequence) 694 | (escape_sequence) 695 | (escape_sequence) 696 | (escape_sequence) 697 | (escape_sequence) 698 | (escape_sequence)) 699 | (string_end)))) 700 | 701 | ================================================================================ 702 | Lists 703 | ================================================================================ 704 | 705 | [a, b, [c, d]] 706 | [*()] 707 | [*[]] 708 | [*a] 709 | [*a.b] 710 | [*a[b].c] 711 | [*a()] 712 | 713 | -------------------------------------------------------------------------------- 714 | 715 | (module 716 | (expression_statement 717 | (list 718 | (identifier) 719 | (identifier) 720 | (list 721 | (identifier) 722 | (identifier)))) 723 | (expression_statement 724 | (list 725 | (list_splat 726 | (tuple)))) 727 | (expression_statement 728 | (list 729 | (list_splat 730 | (list)))) 731 | (expression_statement 732 | (list 733 | (list_splat 734 | (identifier)))) 735 | (expression_statement 736 | (list 737 | (attribute 738 | (list_splat 739 | (identifier)) 740 | (identifier)))) 741 | (expression_statement 742 | (list 743 | (attribute 744 | (subscript 745 | (list_splat 746 | (identifier)) 747 | (identifier)) 748 | (identifier)))) 749 | (expression_statement 750 | (list 751 | (call 752 | (list_splat 753 | (identifier)) 754 | (argument_list))))) 755 | 756 | ================================================================================ 757 | List comprehensions 758 | ================================================================================ 759 | 760 | [a + b for (a, b) in items] 761 | [a for b in c for a in b] 762 | [(x,y) for x in [1,2,3] for y in [1,2,3] if True] 763 | [a for a in lambda: True, lambda: False if a()] 764 | 765 | -------------------------------------------------------------------------------- 766 | 767 | (module 768 | (expression_statement 769 | (list_comprehension 770 | (binary_operator 771 | (identifier) 772 | (identifier)) 773 | (for_in_clause 774 | (tuple_pattern 775 | (identifier) 776 | (identifier)) 777 | (identifier)))) 778 | (expression_statement 779 | (list_comprehension 780 | (identifier) 781 | (for_in_clause 782 | (identifier) 783 | (identifier)) 784 | (for_in_clause 785 | (identifier) 786 | (identifier)))) 787 | (expression_statement 788 | (list_comprehension 789 | (tuple 790 | (identifier) 791 | (identifier)) 792 | (for_in_clause 793 | (identifier) 794 | (list 795 | (integer) 796 | (integer) 797 | (integer))) 798 | (for_in_clause 799 | (identifier) 800 | (list 801 | (integer) 802 | (integer) 803 | (integer))) 804 | (if_clause 805 | (true)))) 806 | (expression_statement 807 | (list_comprehension 808 | (identifier) 809 | (for_in_clause 810 | (identifier) 811 | (lambda 812 | (true)) 813 | (lambda 814 | (false))) 815 | (if_clause 816 | (call 817 | (identifier) 818 | (argument_list)))))) 819 | 820 | ================================================================================ 821 | Dictionaries 822 | ================================================================================ 823 | 824 | {a: 1, b: 2} 825 | {} 826 | {**{}} 827 | {**a} 828 | {**a.b} 829 | {**a[b].c} 830 | {**a()} 831 | 832 | -------------------------------------------------------------------------------- 833 | 834 | (module 835 | (expression_statement 836 | (dictionary 837 | (pair 838 | (identifier) 839 | (integer)) 840 | (pair 841 | (identifier) 842 | (integer)))) 843 | (expression_statement 844 | (dictionary)) 845 | (expression_statement 846 | (dictionary 847 | (dictionary_splat 848 | (dictionary)))) 849 | (expression_statement 850 | (dictionary 851 | (dictionary_splat 852 | (identifier)))) 853 | (expression_statement 854 | (dictionary 855 | (dictionary_splat 856 | (attribute 857 | (identifier) 858 | (identifier))))) 859 | (expression_statement 860 | (dictionary 861 | (dictionary_splat 862 | (attribute 863 | (subscript 864 | (identifier) 865 | (identifier)) 866 | (identifier))))) 867 | (expression_statement 868 | (dictionary 869 | (dictionary_splat 870 | (call 871 | (identifier) 872 | (argument_list)))))) 873 | 874 | ================================================================================ 875 | Dictionary comprehensions 876 | ================================================================================ 877 | 878 | {a: b for a, b in items} 879 | {a: b for c in d for e in items} 880 | 881 | -------------------------------------------------------------------------------- 882 | 883 | (module 884 | (expression_statement 885 | (dictionary_comprehension 886 | (pair 887 | (identifier) 888 | (identifier)) 889 | (for_in_clause 890 | (pattern_list 891 | (identifier) 892 | (identifier)) 893 | (identifier)))) 894 | (expression_statement 895 | (dictionary_comprehension 896 | (pair 897 | (identifier) 898 | (identifier)) 899 | (for_in_clause 900 | (identifier) 901 | (identifier)) 902 | (for_in_clause 903 | (identifier) 904 | (identifier))))) 905 | 906 | ================================================================================ 907 | Sets 908 | ================================================================================ 909 | 910 | {a, b, c,} 911 | {*{}} 912 | 913 | -------------------------------------------------------------------------------- 914 | 915 | (module 916 | (expression_statement 917 | (set 918 | (identifier) 919 | (identifier) 920 | (identifier))) 921 | (expression_statement 922 | (set 923 | (list_splat 924 | (dictionary))))) 925 | 926 | ================================================================================ 927 | Set comprehensions 928 | ================================================================================ 929 | 930 | {a[b][c] for a, b, c in items} 931 | {r for s in qs for n in ms} 932 | 933 | -------------------------------------------------------------------------------- 934 | 935 | (module 936 | (expression_statement 937 | (set_comprehension 938 | (subscript 939 | (subscript 940 | (identifier) 941 | (identifier)) 942 | (identifier)) 943 | (for_in_clause 944 | (pattern_list 945 | (identifier) 946 | (identifier) 947 | (identifier)) 948 | (identifier)))) 949 | (expression_statement 950 | (set_comprehension 951 | (identifier) 952 | (for_in_clause 953 | (identifier) 954 | (identifier)) 955 | (for_in_clause 956 | (identifier) 957 | (identifier))))) 958 | 959 | ================================================================================ 960 | Simple Tuples 961 | ================================================================================ 962 | 963 | () 964 | (a, b) 965 | (a, b, c,) 966 | (print, exec) 967 | 968 | -------------------------------------------------------------------------------- 969 | 970 | (module 971 | (expression_statement 972 | (tuple)) 973 | (expression_statement 974 | (tuple 975 | (identifier) 976 | (identifier))) 977 | (expression_statement 978 | (tuple 979 | (identifier) 980 | (identifier) 981 | (identifier))) 982 | (expression_statement 983 | (tuple 984 | (identifier) 985 | (identifier)))) 986 | 987 | ================================================================================ 988 | Generator expression 989 | ================================================================================ 990 | 991 | (a[b][c] for a, b, c in items) 992 | dict((a, b) for a, b in d) 993 | (a for b in c for d in e,) 994 | (x for x in range(1, 10)) 995 | 996 | -------------------------------------------------------------------------------- 997 | 998 | (module 999 | (expression_statement 1000 | (generator_expression 1001 | (subscript 1002 | (subscript 1003 | (identifier) 1004 | (identifier)) 1005 | (identifier)) 1006 | (for_in_clause 1007 | (pattern_list 1008 | (identifier) 1009 | (identifier) 1010 | (identifier)) 1011 | (identifier)))) 1012 | (expression_statement 1013 | (call 1014 | (identifier) 1015 | (generator_expression 1016 | (tuple 1017 | (identifier) 1018 | (identifier)) 1019 | (for_in_clause 1020 | (pattern_list 1021 | (identifier) 1022 | (identifier)) 1023 | (identifier))))) 1024 | (expression_statement 1025 | (generator_expression 1026 | (identifier) 1027 | (for_in_clause 1028 | (identifier) 1029 | (identifier)) 1030 | (for_in_clause 1031 | (identifier) 1032 | (identifier)))) 1033 | (expression_statement 1034 | (generator_expression 1035 | (identifier) 1036 | (for_in_clause 1037 | (identifier) 1038 | (call 1039 | (identifier) 1040 | (argument_list 1041 | (integer) 1042 | (integer))))))) 1043 | -------------------------------------------------------------------------------- /test/highlight/keywords.py: -------------------------------------------------------------------------------- 1 | if foo(): 2 | # <- keyword 3 | pass 4 | # <- keyword 5 | elif bar(): 6 | # <- keyword 7 | pass 8 | else: 9 | # <- keyword 10 | foo 11 | 12 | return 13 | # ^ keyword 14 | raise e 15 | # ^ keyword 16 | 17 | for i in foo(): 18 | # <- keyword 19 | # ^ variable 20 | # ^ operator 21 | # ^ function 22 | continue 23 | # <- keyword 24 | break 25 | # <- keyword 26 | 27 | a and b or c 28 | # ^ operator 29 | # ^ variable 30 | # ^ operator 31 | -------------------------------------------------------------------------------- /test/highlight/parameters.py: -------------------------------------------------------------------------------- 1 | def g(h, i, /, j, *, k=100, **kwarg): 2 | # ^ operator 3 | # ^ operator 4 | pass 5 | -------------------------------------------------------------------------------- /test/highlight/pattern_matching.py: -------------------------------------------------------------------------------- 1 | match command.split(): 2 | # ^ keyword 3 | case ["quit"]: 4 | # ^ keyword 5 | print("Goodbye!") 6 | quit_game() 7 | case ["look"]: 8 | # ^ keyword 9 | current_room.describe() 10 | case ["get", obj]: 11 | # ^ keyword 12 | character.get(obj, current_room) 13 | case ["go", direction]: 14 | # ^ keyword 15 | current_room = current_room.neighbor(direction) 16 | # The rest of your commands go here 17 | 18 | match command.split(): 19 | # ^ keyword 20 | case ["drop", *objects]: 21 | # ^ keyword 22 | for obj in objects: 23 | character.drop(obj, current_room) 24 | 25 | match command.split(): 26 | # ^ keyword 27 | case ["quit"]: ... # Code omitted for brevity 28 | case ["go", direction]: pass 29 | case ["drop", *objects]: pass 30 | case _: 31 | print(f"Sorry, I couldn't understand {command!r}") 32 | 33 | match command.split(): 34 | # ^ keyword 35 | case ["north"] | ["go", "north"]: 36 | # ^ keyword 37 | current_room = current_room.neighbor("north") 38 | case ["get", obj] | ["pick", "up", obj] | ["pick", obj, "up"]: 39 | # ^ keyword 40 | pass 41 | 42 | match = 2 43 | # ^ variable 44 | match, a = 2, 3 45 | # ^ variable 46 | match: int = secret 47 | # ^ variable 48 | x, match: str = 2, "hey, what's up?" 49 | # <- variable 50 | # ^ variable 51 | 52 | if match := re.fullmatch(r"(-)?(\d+:)?\d?\d:\d\d(\.\d*)?", time, flags=re.ASCII): 53 | # ^ variable 54 | return match 55 | -------------------------------------------------------------------------------- /test/tags/main.py: -------------------------------------------------------------------------------- 1 | class MyClass: 2 | # ^ definition.class 3 | def hello(): 4 | # ^ definition.function 5 | print "hello from MyClass" 6 | 7 | MyClass.hello() 8 | # ^ reference.call 9 | 10 | def main(): 11 | # ^ definition.function 12 | print "Hello, world!" 13 | 14 | main() 15 | # <- reference.call 16 | -------------------------------------------------------------------------------- /tree-sitter.json: -------------------------------------------------------------------------------- 1 | { 2 | "grammars": [ 3 | { 4 | "name": "python", 5 | "camelcase": "Python", 6 | "scope": "source.python", 7 | "path": ".", 8 | "file-types": [ 9 | "py" 10 | ], 11 | "highlights": "queries/highlights.scm", 12 | "tags": "queries/tags.scm", 13 | "injection-regex": "py" 14 | } 15 | ], 16 | "metadata": { 17 | "version": "0.23.6", 18 | "license": "MIT", 19 | "description": "Python grammar for tree-sitter", 20 | "authors": [ 21 | { 22 | "name": "Max Brunsfeld", 23 | "email": "maxbrunsfeld@gmail.com" 24 | }, 25 | { 26 | "name": "Amaan Qureshi", 27 | "email": "amaanq12@gmail.com" 28 | } 29 | ], 30 | "links": { 31 | "repository": "https://github.com/tree-sitter/tree-sitter-python" 32 | } 33 | }, 34 | "bindings": { 35 | "c": true, 36 | "go": true, 37 | "node": true, 38 | "python": true, 39 | "rust": true, 40 | "swift": true 41 | } 42 | } 43 | --------------------------------------------------------------------------------