├── code_indexer_loop
    ├── __init__.py
    ├── utils.py
    ├── test_api_dummy_sql.sql.txt
    ├── constants.py
    ├── test_api.py
    ├── api.py
    ├── code_splitter.py
    └── test_api_dummy_file.py.txt
├── Makefile
├── .github
    └── workflows
    │   ├── stale.yaml
    │   ├── gitleaks_pr.yaml
    │   └── gitleaks_push.yaml
├── pyproject.toml
├── examples
    └── basic_usage.ipynb
├── .gitignore
├── README.md
└── LICENSE


/code_indexer_loop/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.2.1"
2 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | all: clean build publish
 2 | 
 3 | build:
 4 | 	flit build
 5 | 
 6 | clean:
 7 | 	rm -rf dist
 8 | 	rm -rf build
 9 | 
10 | publish: build
11 | 	flit publish
12 | 
13 | .PHONY: build clean publish all
14 | 


--------------------------------------------------------------------------------
/code_indexer_loop/utils.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | 
 3 | 
 4 | def hash_md5(filename):
 5 |     hash_md5 = hashlib.md5()
 6 |     with open(filename, "rb") as f:
 7 |         for chunk in iter(lambda: f.read(4096), b""):
 8 |             hash_md5.update(chunk)
 9 |     return hash_md5.hexdigest()
10 | 


--------------------------------------------------------------------------------
/code_indexer_loop/test_api_dummy_sql.sql.txt:
--------------------------------------------------------------------------------
 1 | SELECT
 2 |   Books.BookID,
 3 |   Books.Title,
 4 |   Authors.AuthorName,
 5 |   Orders.OrderDate
 6 | FROM
 7 |   Books
 8 | JOIN
 9 |   Authors ON Books.AuthorID = Authors.AuthorID
10 | JOIN
11 |   Orders ON Books.BookID = Orders.BookID
12 | WHERE
13 |   Orders.OrderDate >= '2022-01-01'
14 |   AND Orders.OrderDate <= '2022-12-31'
15 |   AND Authors.AuthorName LIKE '%John%'
16 | ORDER BY
17 |   Orders.OrderDate DESC;
18 | 


--------------------------------------------------------------------------------
/code_indexer_loop/constants.py:
--------------------------------------------------------------------------------
 1 | EXTENSION_TO_TREE_SITTER_LANGUAGE = {
 2 |     ".c": "c",
 3 |     ".cc": "cpp",
 4 |     ".cpp": "cpp",
 5 |     ".cs": "c-sharp",
 6 |     ".cxx": "cpp",
 7 |     ".go": "go",
 8 |     ".hs": "haskell",
 9 |     ".java": "java",
10 |     ".jl": "julia",
11 |     ".js": "javascript",
12 |     ".jsx": "javascript",
13 |     ".php": "php",
14 |     ".py": "python",
15 |     ".rb": "ruby",
16 |     ".rs": "rust",
17 |     ".scala": "scala",
18 |     ".sql": "sql",
19 |     ".swift": "swift",
20 |     ".ts": "typescript",
21 |     ".tsx": "typescript",
22 | }
23 | 


--------------------------------------------------------------------------------
/.github/workflows/stale.yaml:
--------------------------------------------------------------------------------
 1 | #####################################
 2 | #       DO NOT EDIT DIRECTLY.       #
 3 | # This file is managed by Terraform #
 4 | #####################################
 5 | 
 6 | name: 'Close stale PRs'
 7 | on:
 8 |   schedule:
 9 |     - cron: '30 1 * * *'
10 | 
11 | jobs:
12 |   stale:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/stale@v8
16 |         with:
17 |           stale-pr-message: 'This PR is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days.'
18 |           close-pr-message: 'This PR was closed because it has been stalled for 7 days with no activity.'
19 |           days-before-pr-stale: 30
20 |           days-before-pr-close: 7
21 |           exempt-pr-labels: 'dependencies,security'
22 | 


--------------------------------------------------------------------------------
/.github/workflows/gitleaks_pr.yaml:
--------------------------------------------------------------------------------
 1 | #####################################
 2 | #       DO NOT EDIT DIRECTLY.       #
 3 | # This file is managed by Terraform #
 4 | #####################################
 5 | 
 6 | on: [pull_request]
 7 | 
 8 | jobs:
 9 |   gitleaks:
10 |     runs-on: ubuntu-latest
11 |     name: Detect Secrets
12 |     steps:
13 |       - name: Checkout
14 |         uses: actions/checkout@v3
15 |         with:
16 |           fetch-depth: 0 # Checkout full history to make .gitleaksignore work like it does locally
17 | 
18 |       - name: GitLeaks
19 |         uses: gacts/gitleaks@v1 # Action page: <https://github.com/gacts/gitleaks>
20 | 
21 |       - name: Add Failure Instructions to Pull Request
22 |         if: ${{ failure() }}
23 |         uses: thollander/actions-comment-pull-request@v1 # Action page: <https://github.com/thollander/actions-comment-pull-request>
24 |         with:
25 |           message: |
26 |             :warning: A secret was detected :warning:
27 |             Follow instructions in [Notion](https://www.notion.so/definitive-io/GitHub-Secret-Prevention-97986fd7ae9f45dd8703a1e42f7b07f8#027d1f9cd2544a0798505a1817dfe3df) to resolve.
28 | 


--------------------------------------------------------------------------------
/.github/workflows/gitleaks_push.yaml:
--------------------------------------------------------------------------------
 1 | #####################################
 2 | #       DO NOT EDIT DIRECTLY.       #
 3 | # This file is managed by Terraform #
 4 | #####################################
 5 | 
 6 | on: [push]
 7 | 
 8 | jobs:
 9 |   gitleaks:
10 |     runs-on: ubuntu-latest
11 |     name: Detect Secrets
12 |     steps:
13 |       - name: Checkout
14 |         uses: actions/checkout@v3
15 |         with:
16 |           fetch-depth: 0 # Checkout full history to make .gitleaksignore work like it does locally
17 | 
18 |       - name: GitLeaks
19 |         uses: gacts/gitleaks@v1 # Action page: <https://github.com/gacts/gitleaks>
20 | 
21 |       - name: Email security@definitive.io
22 |         if: ${{ failure() && github.event.number == 0 }} # Only run for push events
23 |         uses: licenseware/send-email-notification@v1 # Action page: <https://github.com/licenseware/send-email-notification>
24 |         with:
25 |           api-key: ${{ secrets.SENDGRID_API_KEY }}
26 |           subject: Secret detected in GitHub repository '${{ github.event.repository.name }}'
27 |           from-email: GitLeaks GitHub Action <no-reply@definitive.io>
28 |           to-email: security@definitive.io
29 |           markdown-body: |
30 |             Secret detected in ${{ github.event.repository.url }}. See details below:
31 | 
32 |             * action: ${{ github.event.repository.url }}/actions/runs/${{ github.run_id }}
33 |             * commit: ${{ github.event.head_commit.url }}
34 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | build-backend = "flit_core.buildapi"
 3 | requires = ["flit_core >=3.8.0,<4"]
 4 | 
 5 | [project]
 6 | name = "code-indexer-loop"
 7 | description = "Code Indexer Loop"
 8 | authors = [
 9 |     {name = "Rick Lamers", email = "rick@definitive.io"}
10 | ]
11 | dynamic = ["version"]
12 | readme = "README.md"
13 | requires-python = ">=3.9"
14 | 
15 | dependencies = [
16 |     "llama-index>=0.9.14,<0.10",
17 |     "chromadb>=0.4.8,<0.5",
18 |     "tree-sitter-languages>=1.7.0,<1.8",
19 |     "tree-sitter>=0.20.2,<0.21",
20 |     "tiktoken>=0.4.0,<0.5",
21 |     "langchain>=0.0.354,<0.1.0",
22 |     "watchdog>=2.3.1,<2.4",
23 |     "nltk>=3.8.1,<3.9",
24 | ]
25 | 
26 | [project.optional-dependencies]
27 | dev = [
28 |     "toml ~=0.10.2",
29 |     "black ~=23.3.0",
30 |     "isort ~=5.9.3",
31 |     "autoflake ~=2.2.0",
32 |     "ruff ~=0.0.284",
33 |     "pytest ~=7.4.1",
34 |     "flit >=3.8.0,<4",
35 | ]
36 | test = [
37 |     "pytest-cov ~=3.0.0",
38 | ]
39 | 
40 | [tool.black]
41 | line-length = 120
42 | 
43 | [tool.ruff]
44 | # Enable the pycodestyle (`E`) and Pyflakes (`F`) rules by default.
45 | # Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
46 | # McCabe complexity (`C901`) by default.
47 | select = ["E", "F"]
48 | ignore = []
49 | 
50 | # Allow autofix for all enabled rules (when `--fix`) is provided.
51 | fixable = ["ALL"]
52 | unfixable = []
53 | 
54 | # Exclude a variety of commonly ignored directories.
55 | exclude = [
56 |     ".bzr",
57 |     ".direnv",
58 |     ".eggs",
59 |     ".git",
60 |     ".git-rewrite",
61 |     ".hg",
62 |     ".mypy_cache",
63 |     ".nox",
64 |     ".pants.d",
65 |     ".pytype",
66 |     ".ruff_cache",
67 |     ".svn",
68 |     ".tox",
69 |     ".venv",
70 |     "__pypackages__",
71 |     "_build",
72 |     "buck-out",
73 |     "build",
74 |     "dist",
75 |     "node_modules",
76 |     "venv",
77 | ]
78 | per-file-ignores = {}
79 | 
80 | # Same as Black.
81 | line-length = 120
82 | 
83 | # Allow unused variables when underscore-prefixed.
84 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
85 | 
86 | # Assume Python 3.9
87 | target-version = "py39"


--------------------------------------------------------------------------------
/examples/basic_usage.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 2,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "from code_indexer_loop.api import CodeIndexer"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 3,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "indexer = CodeIndexer(src_dir=os.environ[\"CIL_SRC_DIR\"], watch=True)"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 4,
 33 |    "metadata": {},
 34 |    "outputs": [
 35 |     {
 36 |      "data": {
 37 |       "text/plain": [
 38 |        "6"
 39 |       ]
 40 |      },
 41 |      "execution_count": 4,
 42 |      "metadata": {},
 43 |      "output_type": "execute_result"
 44 |     }
 45 |    ],
 46 |    "source": [
 47 |     "indexer.index.vector_store.client.count()"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 5,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "query = \"pandas\""
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 6,
 62 |    "metadata": {},
 63 |    "outputs": [
 64 |     {
 65 |      "name": "stdout",
 66 |      "output_type": "stream",
 67 |      "text": [
 68 |       "import os\n",
 69 |       "\n",
 70 |       "import pandas as pd\n"
 71 |      ]
 72 |     }
 73 |    ],
 74 |    "source": [
 75 |     "print(indexer.query(query)[0:30])"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 7,
 81 |    "metadata": {},
 82 |    "outputs": [
 83 |     {
 84 |      "data": {
 85 |       "text/plain": [
 86 |        "llama_index.schema.NodeWithScore"
 87 |       ]
 88 |      },
 89 |      "execution_count": 7,
 90 |      "metadata": {},
 91 |      "output_type": "execute_result"
 92 |     }
 93 |    ],
 94 |    "source": [
 95 |     "text_nodes = indexer.query_nodes(query)\n",
 96 |     "\n",
 97 |     "type(text_nodes[0])"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 8,
103 |    "metadata": {},
104 |    "outputs": [
105 |     {
106 |      "data": {
107 |       "text/plain": [
108 |        "dict_keys(['file', 'content'])"
109 |       ]
110 |      },
111 |      "execution_count": 8,
112 |      "metadata": {},
113 |      "output_type": "execute_result"
114 |     }
115 |    ],
116 |    "source": [
117 |     "files = indexer.query_documents(query)\n",
118 |     "\n",
119 |     "files[0].keys()"
120 |    ]
121 |   }
122 |  ],
123 |  "metadata": {
124 |   "kernelspec": {
125 |    "display_name": ".venv",
126 |    "language": "python",
127 |    "name": "python3"
128 |   },
129 |   "language_info": {
130 |    "codemirror_mode": {
131 |     "name": "ipython",
132 |     "version": 3
133 |    },
134 |    "file_extension": ".py",
135 |    "mimetype": "text/x-python",
136 |    "name": "python",
137 |    "nbconvert_exporter": "python",
138 |    "pygments_lexer": "ipython3",
139 |    "version": "3.9.2"
140 |   },
141 |   "orig_nbformat": 4
142 |  },
143 |  "nbformat": 4,
144 |  "nbformat_minor": 2
145 | }
146 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | .vscode/


--------------------------------------------------------------------------------
/code_indexer_loop/test_api.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import pytest
  4 | 
  5 | from code_indexer_loop.code_splitter import (CodeSplitter,
  6 |                                              MaxChunkLengthExceededError,
  7 |                                              TokenCounter)
  8 | 
  9 | THIS_FILE_DIR = os.path.dirname(os.path.realpath(__file__))
 10 | 
 11 | 
 12 | def create_code_splitter(language="python", target_chunk_tokens=5, max_chunk_tokens=200, enforce_max_chunk_tokens=True):
 13 |     return CodeSplitter(
 14 |         language=language,
 15 |         target_chunk_tokens=target_chunk_tokens,
 16 |         max_chunk_tokens=max_chunk_tokens,
 17 |         enforce_max_chunk_tokens=enforce_max_chunk_tokens,
 18 |         token_model="gpt-4",
 19 |         coalesce=50,
 20 |     )
 21 | 
 22 | 
 23 | def test_code_splitter_prefix_model():
 24 |     CodeSplitter(
 25 |         language="python",
 26 |         target_chunk_tokens=10,
 27 |         max_chunk_tokens=10,
 28 |         enforce_max_chunk_tokens=True,
 29 |         token_model="gpt-4-32k-0613",
 30 |         coalesce=50,
 31 |     )
 32 | 
 33 | 
 34 | def test_code_splitter():
 35 |     python_code_splitter = create_code_splitter()
 36 |     chunks = python_code_splitter.split_text(
 37 |         """def foo():
 38 |     print("Hello, world!")
 39 | 
 40 | print(1)"""
 41 |     )
 42 |     assert chunks[0].startswith("def foo():")
 43 |     assert not chunks[0].endswith('")')
 44 | 
 45 | 
 46 | def test_code_splitter_newlines():
 47 |     python_code_splitter = create_code_splitter()
 48 |     chunks = python_code_splitter.split_text(
 49 |         """
 50 | def foo():
 51 |     print("Hello, world!")
 52 | 
 53 | print(1)
 54 | 
 55 | """
 56 |     )
 57 |     assert chunks[0].startswith("\ndef foo():")
 58 |     assert not chunks[0].endswith('")')
 59 |     assert chunks[-1].endswith("\n\n")
 60 | 
 61 | 
 62 | def test_code_splitter_raise():
 63 |     python_code_splitter = create_code_splitter(max_chunk_tokens=5)
 64 |     with pytest.raises(MaxChunkLengthExceededError):
 65 |         python_code_splitter.split_text(
 66 |             """
 67 | def mostdefinitelynotlessthan5tokens():
 68 |     pass
 69 | """
 70 |         )
 71 | 
 72 | 
 73 | def test_code_splitter_noraise():
 74 |     python_code_splitter = create_code_splitter(max_chunk_tokens=5, enforce_max_chunk_tokens=False)
 75 |     python_code_splitter.split_text(
 76 |         """
 77 | def mostdefinitelynotlessthan5tokens():
 78 |     pass
 79 | """
 80 |     )
 81 | 
 82 | 
 83 | def test_code_splitter_token_lengths():
 84 |     tc = TokenCounter(default_model="gpt-4")
 85 |     max_chunk_tokens = 20
 86 |     python_code_splitter = create_code_splitter(
 87 |         max_chunk_tokens=max_chunk_tokens, target_chunk_tokens=max_chunk_tokens // 2
 88 |     )
 89 |     source_code = """
 90 | def add(a, b):
 91 |     return a + b
 92 | 
 93 | def subtract(a, b):
 94 |     return a - b
 95 | 
 96 | add(1, 2)
 97 | """
 98 |     chunks = python_code_splitter.split_text(source_code)
 99 |     joined_chunks = "".join(chunks)
100 |     assert source_code == joined_chunks
101 | 
102 |     chunk_lengths = [tc.count(chunk) for chunk in chunks]
103 |     assert all([chunk_length <= max_chunk_tokens for chunk_length in chunk_lengths])
104 | 
105 | 
106 | def test_long_file():
107 |     hard_file_path = os.path.join(THIS_FILE_DIR, "test_api_dummy_file.py.txt")
108 |     with open(hard_file_path, "r") as f:
109 |         source_code = f.read()
110 | 
111 |     python_code_splitter = create_code_splitter(target_chunk_tokens=1000, max_chunk_tokens=9000)
112 |     chunks = python_code_splitter.split_text(source_code)
113 |     joined_chunks = "".join(chunks)
114 |     assert source_code == joined_chunks
115 | 
116 | 
117 | def test_sql():
118 |     sql_file_path = os.path.join(THIS_FILE_DIR, "test_api_dummy_sql.sql.txt")
119 |     with open(sql_file_path, "r") as f:
120 |         source_code = f.read()
121 | 
122 |     sql_code_splitter = CodeSplitter(
123 |         language="sql",
124 |         target_chunk_tokens=10,
125 |         max_chunk_tokens=1000,
126 |         enforce_max_chunk_tokens=True,
127 |         token_model="gpt-4",
128 |         coalesce=50,
129 |     )
130 | 
131 |     chunks = sql_code_splitter.split_text(source_code)
132 |     joined_chunks = "".join(chunks)
133 |     assert source_code == joined_chunks
134 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Code Indexer Loop
 2 | 
 3 | [![PyPI version](https://badge.fury.io/py/code-indexer-loop.svg?v=2)](https://pypi.org/project/code-indexer-loop/)
 4 | [![License](https://img.shields.io/github/license/definitive-io/code-indexer-loop?v=2)](LICENSE)
 5 | [![Forks](https://img.shields.io/github/forks/definitive-io/code-indexer-loop?v=2)](https://github.com/definitive-io/code-indexer-loop/network)
 6 | [![Stars](https://img.shields.io/github/stars/definitive-io/code-indexer-loop?v=2)](https://github.com/definitive-io/code-indexer-loop/stargazers)
 7 | [![Twitter](https://img.shields.io/twitter/url/https/twitter.com?style=social&label=Follow%20%40DefinitiveIO)](https://twitter.com/definitiveio)
 8 | [![Discord](https://dcbadge.vercel.app/api/server/CPJJfq87Vx?compact=true&style=flat)](https://discord.gg/CPJJfq87Vx)
 9 | 
10 | 
11 | **Code Indexer Loop** is a Python library designed to index and retrieve code snippets. 
12 | 
13 | It uses the useful indexing utilities of the **LlamaIndex** library and the multi-language **tree-sitter** library to parse the code from many popular programming languages. **tiktoken** is used to right-size retrieval based on number of tokens and **LangChain** is used to obtain embeddings (defaults to **OpenAI**'s `text-embedding-ada-002`) and store them in an embedded **ChromaDB** vector database. **watchdog** is used for continuous updating of the index based on file system events.
14 | 
15 | Read the [launch blog post](https://www.definitive.io/blog/open-sourcing-code-indexer-loop) for more details about why we've built this!
16 | 
17 | ## Installation:
18 | Use `pip` to install Code Indexer Loop from PyPI.
19 | ```
20 | pip install code-indexer-loop
21 | ```
22 | 
23 | ## Usage:
24 | 1. Import necessary modules:
25 | ```python
26 | from code_indexer_loop.api import CodeIndexer
27 | ```
28 | 2. Create a CodeIndexer object and have it watch for changes:
29 | ```python
30 | indexer = CodeIndexer(src_dir="path/to/code/", watch=True)
31 | ```
32 | 3. Use `.query` to perform a search query:
33 | ```python
34 | query = "pandas"
35 | print(indexer.query(query)[0:30])
36 | ```
37 | 
38 | Note: make sure the `OPENAI_API_KEY` environment variable is set. This is needed for generating the embeddings.
39 | 
40 | You can also use `indexer.query_nodes` to get the nodes of a query or `indexer.query_documents` to receive the entire source code files.
41 | 
42 | Note that if you edit any of the source code files in the `src_dir` it will efficiently re-index those files using `watchdog` and an `md5` based caching mechanism. This results in up-to-date embeddings every time you query the index.
43 | 
44 | ## Examples
45 | Check out the [basic_usage](examples/basic_usage.ipynb) notebook for a quick overview of the API.
46 | 
47 | ## Token limits
48 | You can configure token limits for the chunks through the CodeIndexer constructor:
49 | 
50 | ```python
51 | indexer = CodeIndexer(
52 |     src_dir="path/to/code/", watch=True,
53 |     target_chunk_tokens = 300,
54 |     max_chunk_tokens = 1000,
55 |     enforce_max_chunk_tokens = False,
56 |     coalesce = 50
57 |     token_model = "gpt-4"
58 | )
59 | ```
60 | 
61 | Note you can choose whether the `max_chunk_tokens` is enforced. If it is, it will raise an exception in case there is no semantic parsing that respects the `max_chunk_tokens`.
62 | 
63 | The `coalesce` argument controls the limit of combining smaller chunks into single chunks to avoid having many very small chunks. The unit for `coalesce` is also tokens.
64 | 
65 | ## tree-sitter
66 | Using `tree-sitter` for parsing, the chunks are broken only at valid node-level string positions in the source file. This avoids breaking up e.g. function and class definitions.
67 | 
68 | ### Supported languages:
69 | C, C++, C#, Go, Haskell, Java, Julia, JavaScript, PHP, Python, Ruby, Rust, Scala, Swift, SQL, TypeScript
70 | 
71 | Note, we're mainly testing Python support. Use other languages at your own peril.
72 | 
73 | ## Contributing
74 | Pull requests are welcome. Please make sure to update tests as appropriate. Use tools provided within `dev` dependencies to maintain the code standard.
75 | 
76 | ### Tests
77 | Run the unit tests by invoking `pytest` in the root.
78 | 
79 | ## License
80 | Please see the LICENSE file provided with the source code.
81 | 
82 | ## Attribution
83 | We'd like to thank the Sweep AI for publishing their ideas about code chunking. Read their blog posts about the topic [here](https://docs.sweep.dev/blogs/chunking-2m-files) and [here](https://docs.sweep.dev/blogs/chunking-improvements). The implementation in `code_indexer_loop` is modified from their original implementation mainly to limit based on tokens instead of characters and to achieve perfect document reconstruction (`"".join(chunks) == original_source_code`).
84 | 


--------------------------------------------------------------------------------
/code_indexer_loop/api.py:
--------------------------------------------------------------------------------
  1 | import atexit
  2 | import os
  3 | from pathlib import Path
  4 | 
  5 | import chromadb
  6 | from langchain.embeddings.openai import OpenAIEmbeddings
  7 | from llama_index import ServiceContext, VectorStoreIndex
  8 | from llama_index.embeddings import LangchainEmbedding
  9 | from llama_index.schema import NodeWithScore, TextNode
 10 | from llama_index.vector_stores import ChromaVectorStore
 11 | from watchdog.events import FileSystemEventHandler
 12 | from watchdog.observers import Observer
 13 | 
 14 | from code_indexer_loop.code_splitter import CodeSplitter
 15 | from code_indexer_loop.constants import EXTENSION_TO_TREE_SITTER_LANGUAGE
 16 | from code_indexer_loop.utils import hash_md5
 17 | 
 18 | 
 19 | class CodeIndexer:
 20 |     src_dir: str
 21 |     target_chunk_tokens: int
 22 |     max_chunk_tokens: int
 23 |     enforce_max_chunk_tokens: bool
 24 |     token_model: str
 25 |     code_splitters = {}
 26 |     hash_cache = {}
 27 |     index: VectorStoreIndex = None
 28 | 
 29 |     def __init__(
 30 |         self,
 31 |         src_dir: str,
 32 |         target_chunk_tokens: int = 300,
 33 |         max_chunk_tokens: int = 1000,
 34 |         enforce_max_chunk_tokens: bool = False,
 35 |         coalesce: int = 50,
 36 |         token_model: str = "gpt-4",
 37 |         watch: bool = False,
 38 |     ):
 39 |         self.src_dir = src_dir
 40 |         self.target_chunk_tokens = target_chunk_tokens
 41 |         self.max_chunk_tokens = max_chunk_tokens
 42 |         self.enforce_max_chunk_tokens = enforce_max_chunk_tokens
 43 |         self.coalesce = coalesce
 44 |         self.token_model = token_model
 45 |         self._create_index()
 46 |         self.refresh_nodes()
 47 | 
 48 |         if watch:
 49 |             self._start_watching()
 50 |             atexit.register(self._stop_watching)
 51 | 
 52 |     def query(self, query: str, k=10) -> str:
 53 |         return "\n".join(
 54 |             [node_with_score.node.text for node_with_score in self.index.as_retriever(similarity_top_k=k).retrieve(query)]
 55 |         )
 56 | 
 57 |     def query_nodes(self, query: str, k=10) -> list[NodeWithScore]:
 58 |         return self.index.as_retriever(similarity_top_k=k).retrieve(query)
 59 | 
 60 |     def query_documents(self, query: str, k=10) -> list[dict[str, str]]:
 61 |         nodes = self.index.as_retriever(similarity_top_k=k).retrieve(query)
 62 |         files = [node_with_score.node.metadata["file"] for node_with_score in nodes]
 63 |         # Deduplicate files, preserving order
 64 |         files = list(dict.fromkeys(files))
 65 |         # Read file contents
 66 |         contents = []
 67 |         for file in files:
 68 |             with open(file, "r") as f:
 69 |                 contents.append(
 70 |                     {
 71 |                         "file": file,
 72 |                         "content": f.read(),
 73 |                     }
 74 |                 )
 75 |         return contents
 76 | 
 77 |     def add_file(self, file: str):
 78 |         ext = os.path.splitext(file)[1]
 79 |         text_splitter = self._get_code_splitter(ext)
 80 | 
 81 |         calculated_hash = hash_md5(file)
 82 |         if file in self.hash_cache:
 83 |             if self.hash_cache[file] == calculated_hash:
 84 |                 # Skip file if it hasn't changed
 85 |                 return
 86 |         else:
 87 |             self.hash_cache[file] = calculated_hash
 88 | 
 89 |         with open(file, "r") as f:
 90 |             text = f.read()
 91 |             nodes = [
 92 |                 TextNode(
 93 |                     text=chunk,
 94 |                     metadata={
 95 |                         "file": file,
 96 |                     },
 97 |                 )
 98 |                 for chunk in text_splitter.split_text(text)
 99 |             ]
100 | 
101 |             self._remove_old_nodes(file)
102 |             self._insert_nodes(nodes)
103 | 
104 |     def remove_file(self, file: str):
105 |         self._remove_old_nodes(file)
106 |         del self.hash_cache[file]
107 | 
108 |     def refresh_nodes(self):
109 |         files = self._find_files(self.src_dir, EXTENSION_TO_TREE_SITTER_LANGUAGE)
110 | 
111 |         # Clear any files that no longer exist
112 |         for file in list(self.hash_cache.keys()):
113 |             if file not in files:
114 |                 del self.hash_cache[file]
115 |                 self._remove_old_nodes(file)
116 | 
117 |         # For each file, split into chunks and index
118 |         for file in files:
119 |             self.add_file(str(file))
120 | 
121 |     def _start_watching(self):
122 |         event_handler = CodeChangeHandler(self)
123 |         self.observer = Observer()
124 |         self.observer.schedule(event_handler, self.src_dir, recursive=True)
125 |         self.observer.start()
126 | 
127 |     def _stop_watching(self):
128 |         if hasattr(self, "observer"):
129 |             self.observer.stop()
130 |             self.observer.join()
131 | 
132 |     def _find_files(self, path, include_ext={}):
133 |         """
134 |         Recursively find all files in a given path.
135 | 
136 |         Parameters:
137 |             path (str): The root directory to start searching from.
138 |             include_ext (dict): A dictionary of file extensions to include
139 |                 (keys are extensions including leading period if applicable).
140 | 
141 |         Returns:
142 |             list: A list of full file paths for each file found.
143 |         """
144 |         found_files = []
145 | 
146 |         for root, _, files in os.walk(path):
147 |             for file in files:
148 |                 # Check if the file should be excluded based on its extension
149 |                 file_ext = os.path.splitext(file)[1]
150 |                 if file_ext in include_ext:
151 |                     # Construct the full path of the file and append to list
152 |                     full_path = Path(os.path.join(root, file)).resolve()
153 |                     found_files.append(full_path)
154 | 
155 |         return set(found_files)
156 | 
157 |     def _get_code_splitter(self, ext) -> CodeSplitter:
158 |         if ext not in EXTENSION_TO_TREE_SITTER_LANGUAGE:
159 |             raise ValueError(f"Extension {ext} not supported.")
160 |         language = EXTENSION_TO_TREE_SITTER_LANGUAGE[ext]
161 |         if language not in self.code_splitters:
162 |             text_splitter = CodeSplitter(
163 |                 language=language,
164 |                 target_chunk_tokens=self.target_chunk_tokens,
165 |                 max_chunk_tokens=self.max_chunk_tokens,
166 |                 enforce_max_chunk_tokens=self.enforce_max_chunk_tokens,
167 |                 coalesce=self.coalesce,
168 |                 token_model=self.token_model,
169 |             )
170 |             self.code_splitters[ext] = text_splitter
171 | 
172 |         return self.code_splitters[ext]
173 | 
174 |     def _remove_old_nodes(self, file):
175 |         # Remove existing nodes for the same file
176 |         self.index.vector_store.client.delete(where={"file": file})
177 | 
178 |     def _insert_nodes(self, nodes):
179 |         self.index.insert_nodes(nodes)
180 | 
181 |     def _create_index(self) -> VectorStoreIndex:
182 |         # Create client and a new collection
183 |         chroma_client = chromadb.EphemeralClient()
184 |         chroma_collection = chroma_client.create_collection("code-index")
185 | 
186 |         # Define embedding function
187 |         embed_model = LangchainEmbedding(OpenAIEmbeddings())
188 | 
189 |         # Set up ChromaVectorStore and load in data
190 |         vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
191 |         service_context = ServiceContext.from_defaults(embed_model=embed_model)
192 |         index = VectorStoreIndex.from_vector_store(vector_store=vector_store, service_context=service_context)
193 | 
194 |         self.index = index
195 |         return index
196 | 
197 | 
198 | class CodeChangeHandler(FileSystemEventHandler):
199 |     def __init__(self, indexer: CodeIndexer):
200 |         self.indexer = indexer
201 | 
202 |     def on_modified(self, event):
203 |         if event.is_directory:
204 |             # Directory modifications shouldn't trigger a reindex
205 |             return
206 |         else:
207 |             # Update only if the changed file has a supported extension
208 |             ext = os.path.splitext(event.src_path)[1]
209 |             if ext in EXTENSION_TO_TREE_SITTER_LANGUAGE:
210 |                 self.indexer.add_file(event.src_path)
211 | 
212 |     def on_created(self, event):
213 |         if event.is_directory:
214 |             self.indexer.refresh_nodes()
215 |         else:
216 |             # Update only if the changed file has a supported extension
217 |             ext = os.path.splitext(event.src_path)[1]
218 |             if ext in EXTENSION_TO_TREE_SITTER_LANGUAGE:
219 |                 self.indexer.add_file(event.src_path)
220 | 
221 |     def on_moved(self, event):
222 |         self.indexer.refresh_nodes()
223 | 
224 |     def on_deleted(self, event):
225 |         if event.is_directory:
226 |             self.indexer.refresh_nodes()
227 |         else:
228 |             ext = os.path.splitext(event.src_path)[1]
229 |             if ext in EXTENSION_TO_TREE_SITTER_LANGUAGE:
230 |                 self.indexer.remove_file(event.src_path)
231 | 


--------------------------------------------------------------------------------
/code_indexer_loop/code_splitter.py:
--------------------------------------------------------------------------------
  1 | """Code Splitter.
  2 | 
  3 | Implementation amalgamated from:
  4 | https://docs.sweep.dev/blogs/chunking-improvements
  5 | https://docs.sweep.dev/blogs/chunking-2m-files
  6 | https://github.com/jerryjliu/llama_index/pull/7100
  7 | 
  8 | """
  9 | 
 10 | import re
 11 | from dataclasses import dataclass
 12 | from typing import List, Optional, Union
 13 | 
 14 | import tiktoken
 15 | from tree_sitter import Node
 16 | 
 17 | 
 18 | class MaxChunkLengthExceededError(Exception):
 19 |     pass
 20 | 
 21 | 
 22 | @dataclass
 23 | class Span:
 24 |     # Represents a slice of a string
 25 |     start: int = 0
 26 |     end: int = 0
 27 | 
 28 |     def __post_init__(self):
 29 |         # If end is None, set it to start
 30 |         if self.end is None:
 31 |             self.end = self.start
 32 | 
 33 |     def extract(self, s: bytes) -> bytes:
 34 |         # Grab the corresponding substring of string s by bytes
 35 |         return s[self.start : self.end]
 36 | 
 37 |     def extract_lines(self, s: str) -> str:
 38 |         lines = s.split("\n")
 39 |         selected_lines = lines[self.start : self.end]
 40 |         joined = "\n".join(selected_lines)
 41 |         # if selection doesn't extend to the last line, add the missing newline
 42 |         if self.end < len(lines):
 43 |             joined += "\n"
 44 |         return joined
 45 | 
 46 |     def __add__(self, other: Union["Span", int]) -> "Span":
 47 |         # e.g. Span(1, 2) + Span(2, 4) = Span(1, 4) (concatenation)
 48 |         # There are no safety checks: Span(a, b) + Span(c, d) = Span(a, d)
 49 |         # and there are no requirements for b = c.
 50 |         if isinstance(other, int):
 51 |             return Span(self.start + other, self.end + other)
 52 |         elif isinstance(other, Span):
 53 |             return Span(self.start, other.end)
 54 |         else:
 55 |             raise NotImplementedError()
 56 | 
 57 |     def __len__(self) -> int:
 58 |         # i.e. Span(a, b) = b - a
 59 |         return self.end - self.start
 60 | 
 61 | 
 62 | class TokenCounter:
 63 |     default_model: str
 64 |     initialized_models = {}
 65 | 
 66 |     def __init__(self, default_model: str):
 67 |         self.default_model = default_model
 68 | 
 69 |     def count(self, text: str, model: Optional[str] = None):
 70 |         if model is None:
 71 |             model = self.default_model
 72 | 
 73 |         if model not in self.initialized_models:
 74 |             try:
 75 |                 self.initialized_models[model] = tiktoken.encoding_for_model(model)
 76 |             except KeyError:
 77 |                 raise KeyError(f"Model {model} not supported.")
 78 | 
 79 |         return len(self.initialized_models[model].encode(text, disallowed_special=()))
 80 | 
 81 |     def count_chunk(self, chunk: Span, source_code: bytes, model: Optional[str] = None):
 82 |         return self.count(chunk.extract(source_code).decode("utf-8"), model)
 83 | 
 84 | 
 85 | class CodeSplitter:
 86 |     """Split code using a AST parser."""
 87 | 
 88 |     language: str
 89 |     target_chunk_tokens: int
 90 |     max_chunk_tokens: int
 91 |     enforce_max_chunk_tokens: bool
 92 |     coalesce: int
 93 |     token_counter: TokenCounter
 94 | 
 95 |     def __init__(
 96 |         self,
 97 |         language: str,
 98 |         target_chunk_tokens: int,
 99 |         max_chunk_tokens: int,
100 |         enforce_max_chunk_tokens: bool,
101 |         coalesce: int,
102 |         token_model: str,
103 |     ):
104 |         self.token_counter = TokenCounter(default_model=token_model)
105 |         self.target_chunk_tokens = target_chunk_tokens
106 |         self.max_chunk_tokens = max_chunk_tokens
107 |         self.enforce_max_chunk_tokens = enforce_max_chunk_tokens
108 |         self.language = language
109 |         self.coalesce = coalesce
110 | 
111 |     @classmethod
112 |     def class_name(cls) -> str:
113 |         """Get class name."""
114 |         return "CodeSplitter"
115 | 
116 |     def chunk_tree(
117 |         self,
118 |         tree,
119 |         source_code: bytes,
120 |     ) -> list[Span]:
121 |         # 1. Recursively form chunks
122 |         def chunk_node(node: Node) -> list[Span]:
123 |             chunks: list[Span] = []
124 |             current_chunk: Span = Span(node.start_byte, node.start_byte)
125 |             node_children = node.children
126 |             for child in node_children:
127 |                 child_token_len = self.token_counter.count_chunk(Span(child.start_byte, child.end_byte), source_code)
128 |                 child_and_current_token_len = self.token_counter.count_chunk(
129 |                     Span(child.start_byte, child.end_byte), source_code
130 |                 ) + self.token_counter.count_chunk(current_chunk, source_code)
131 | 
132 |                 if child_token_len > self.target_chunk_tokens:
133 |                     if child_token_len > self.max_chunk_tokens and self.enforce_max_chunk_tokens:
134 |                         raise MaxChunkLengthExceededError(
135 |                             f"Chunk token length {child_token_len} exceeds maximum {self.max_chunk_tokens}."
136 |                         )
137 | 
138 |                     chunks.append(current_chunk)
139 |                     current_chunk = Span(child.end_byte, child.end_byte)
140 |                     chunks.extend(chunk_node(child))
141 |                 elif child_and_current_token_len > self.target_chunk_tokens:
142 |                     if child_and_current_token_len > self.max_chunk_tokens and self.enforce_max_chunk_tokens:
143 |                         raise MaxChunkLengthExceededError(
144 |                             f"Chunk token length {child_and_current_token_len}"
145 |                             f" exceeds maximum {self.max_chunk_tokens}."
146 |                         )
147 |                     chunks.append(current_chunk)
148 |                     current_chunk = Span(child.start_byte, child.end_byte)
149 |                 else:
150 |                     current_chunk += Span(child.start_byte, child.end_byte)
151 | 
152 |             final_chunk_token_len = self.token_counter.count_chunk(current_chunk, source_code)
153 |             if final_chunk_token_len > self.max_chunk_tokens and self.enforce_max_chunk_tokens:
154 |                 raise MaxChunkLengthExceededError(
155 |                     f"Chunk token length {final_chunk_token_len} exceeds maximum {self.max_chunk_tokens}."
156 |                 )
157 |             chunks.append(current_chunk)
158 |             return chunks
159 | 
160 |         chunks = chunk_node(tree.root_node)
161 | 
162 |         # Filter empty chunks
163 |         chunks = [chunk for chunk in chunks if len(chunk) > 0]
164 | 
165 |         # Early return if there is no chunk
166 |         if len(chunks) == 0:
167 |             return []
168 |         # Early return if there is only one chunk
169 |         if len(chunks) < 2:
170 |             return [Span(0, len(chunks[0]))]
171 | 
172 |         # Filling in the gaps
173 |         # by aligning end of one chunk with start of next
174 |         chunks[0].start = 0
175 |         for prev, curr in zip(chunks[:-1], chunks[1:]):
176 |             prev.end = curr.start
177 |         curr.end = len(source_code)
178 | 
179 |         # Combining small chunks with bigger ones
180 |         new_chunks = []
181 |         aggregated_chunk = Span(0, 0)
182 |         aggregated_chunk_token_len = 0
183 |         for chunk in chunks:
184 |             # Check if the combined chunk exceeds target_chunk_tokens
185 |             # Note, at this point no chunk exceeds max_chunk_tokens
186 |             # if max_chunk_tokens is enforced.
187 |             chunk_token_len = self.token_counter.count_chunk(chunk, source_code)
188 |             if chunk_token_len > self.target_chunk_tokens:
189 |                 new_chunks.append(aggregated_chunk)
190 |                 new_chunks.append(chunk)
191 |                 aggregated_chunk = Span(chunk.end, chunk.end)
192 |                 aggregated_chunk_token_len = 0
193 |             elif aggregated_chunk_token_len + chunk_token_len > self.target_chunk_tokens:
194 |                 new_chunks.append(aggregated_chunk)
195 |                 aggregated_chunk = Span(chunk.start, chunk.end)
196 |                 aggregated_chunk_token_len = chunk_token_len
197 |             else:
198 |                 # Combined chunk does not exceed target_chunk_tokens
199 |                 # so we add the current chunk to the aggregated_chunk.
200 |                 # Note, there is no need to check whether the combined chunk
201 |                 # exceeds max_chunk_tokens because we have already checked.
202 |                 aggregated_chunk += chunk
203 |                 aggregated_chunk_token_len += chunk_token_len
204 |                 if aggregated_chunk_token_len > self.coalesce:
205 |                     new_chunks.append(aggregated_chunk)
206 |                     aggregated_chunk = Span(chunk.end, chunk.end)
207 |                     aggregated_chunk_token_len = 0
208 | 
209 |         if len(aggregated_chunk) > 0:
210 |             new_chunks.append(aggregated_chunk)
211 | 
212 |         # Changing line numbers
213 |         line_chunks = [
214 |             Span(
215 |                 self.get_line_number(chunk.start, source_code),
216 |                 self.get_line_number(chunk.end, source_code),
217 |             )
218 |             for chunk in new_chunks
219 |         ]
220 | 
221 |         # Eliminating empty chunks
222 |         line_chunks = [chunk for chunk in line_chunks if len(chunk) > 0]
223 |         return line_chunks
224 | 
225 |     def split_and_keep_newline(self, byte_str):
226 |         return re.split(b"(?<=\n)", byte_str)
227 | 
228 |     def get_line_number(self, index: int, source_code: bytes) -> int:
229 |         total_chars = 0
230 |         for line_number, line in enumerate(self.split_and_keep_newline(source_code), start=1):
231 |             total_chars += len(line)
232 |             if total_chars > index:
233 |                 return line_number - 1
234 |         return line_number
235 | 
236 |     def split_text(self, text: str) -> List[str]:
237 |         """Split incoming code and return chunks using the AST."""
238 |         try:
239 |             import tree_sitter_languages
240 |         except ImportError:
241 |             raise ImportError("Please install tree_sitter_languages to use CodeSplitter.")
242 | 
243 |         try:
244 |             parser = tree_sitter_languages.get_parser(self.language)
245 |         except Exception as e:
246 |             print(
247 |                 f"Could not get parser for language {self.language}. Check "
248 |                 "https://github.com/grantjenks/py-tree-sitter-languages#license "
249 |                 "for a list of valid languages."
250 |             )
251 |             raise e
252 | 
253 |         tree = parser.parse(text.encode("utf-8"))
254 |         if not tree.root_node.children or tree.root_node.children[0].type != "ERROR":
255 |             line_spans = self.chunk_tree(tree, text.encode("utf-8"))
256 |             chunks = [line_span.extract_lines(text) for line_span in line_spans]
257 |             return chunks
258 |         else:
259 |             raise ValueError(f"Could not parse code with language {self.language}.")
260 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright 2023 Definitive Inc.
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.


--------------------------------------------------------------------------------
/code_indexer_loop/test_api_dummy_file.py.txt:
--------------------------------------------------------------------------------
   1 | """
   2 |     Sourced from: cpython/Lib/ast.py (https://github.com/python/cpython)
   3 | 
   4 |     ast
   5 |     ~~~
   6 | 
   7 |     The `ast` module helps Python applications to process trees of the Python
   8 |     abstract syntax grammar.  The abstract syntax itself might change with
   9 |     each Python release; this module helps to find out programmatically what
  10 |     the current grammar looks like and allows modifications of it.
  11 | 
  12 |     An abstract syntax tree can be generated by passing `ast.PyCF_ONLY_AST` as
  13 |     a flag to the `compile()` builtin function or by using the `parse()`
  14 |     function from this module.  The result will be a tree of objects whose
  15 |     classes all inherit from `ast.AST`.
  16 | 
  17 |     A modified abstract syntax tree can be compiled into a Python code object
  18 |     using the built-in `compile()` function.
  19 | 
  20 |     Additionally various helper functions are provided that make working with
  21 |     the trees simpler.  The main intention of the helper functions and this
  22 |     module in general is to provide an easy to use interface for libraries
  23 |     that work tightly with the python syntax (template engines for example).
  24 | 
  25 | 
  26 |     :copyright: Copyright 2008 by Armin Ronacher.
  27 |     :license: Python License.
  28 | """
  29 | import sys
  30 | import re
  31 | from _ast import *
  32 | from contextlib import contextmanager, nullcontext
  33 | from enum import IntEnum, auto, _simple_enum
  34 | 
  35 | 
  36 | def parse(source, filename='<unknown>', mode='exec', *,
  37 |           type_comments=False, feature_version=None, optimize=-1):
  38 |     """
  39 |     Parse the source into an AST node.
  40 |     Equivalent to compile(source, filename, mode, PyCF_ONLY_AST).
  41 |     Pass type_comments=True to get back type comments where the syntax allows.
  42 |     """
  43 |     flags = PyCF_ONLY_AST
  44 |     if optimize > 0:
  45 |         flags |= PyCF_OPTIMIZED_AST
  46 |     if type_comments:
  47 |         flags |= PyCF_TYPE_COMMENTS
  48 |     if feature_version is None:
  49 |         feature_version = -1
  50 |     elif isinstance(feature_version, tuple):
  51 |         major, minor = feature_version  # Should be a 2-tuple.
  52 |         if major != 3:
  53 |             raise ValueError(f"Unsupported major version: {major}")
  54 |         feature_version = minor
  55 |     # Else it should be an int giving the minor version for 3.x.
  56 |     return compile(source, filename, mode, flags,
  57 |                    _feature_version=feature_version, optimize=optimize)
  58 | 
  59 | 
  60 | def literal_eval(node_or_string):
  61 |     """
  62 |     Evaluate an expression node or a string containing only a Python
  63 |     expression.  The string or node provided may only consist of the following
  64 |     Python literal structures: strings, bytes, numbers, tuples, lists, dicts,
  65 |     sets, booleans, and None.
  66 | 
  67 |     Caution: A complex expression can overflow the C stack and cause a crash.
  68 |     """
  69 |     if isinstance(node_or_string, str):
  70 |         node_or_string = parse(node_or_string.lstrip(" \t"), mode='eval')
  71 |     if isinstance(node_or_string, Expression):
  72 |         node_or_string = node_or_string.body
  73 |     def _raise_malformed_node(node):
  74 |         msg = "malformed node or string"
  75 |         if lno := getattr(node, 'lineno', None):
  76 |             msg += f' on line {lno}'
  77 |         raise ValueError(msg + f': {node!r}')
  78 |     def _convert_num(node):
  79 |         if not isinstance(node, Constant) or type(node.value) not in (int, float, complex):
  80 |             _raise_malformed_node(node)
  81 |         return node.value
  82 |     def _convert_signed_num(node):
  83 |         if isinstance(node, UnaryOp) and isinstance(node.op, (UAdd, USub)):
  84 |             operand = _convert_num(node.operand)
  85 |             if isinstance(node.op, UAdd):
  86 |                 return + operand
  87 |             else:
  88 |                 return - operand
  89 |         return _convert_num(node)
  90 |     def _convert(node):
  91 |         if isinstance(node, Constant):
  92 |             return node.value
  93 |         elif isinstance(node, Tuple):
  94 |             return tuple(map(_convert, node.elts))
  95 |         elif isinstance(node, List):
  96 |             return list(map(_convert, node.elts))
  97 |         elif isinstance(node, Set):
  98 |             return set(map(_convert, node.elts))
  99 |         elif (isinstance(node, Call) and isinstance(node.func, Name) and
 100 |               node.func.id == 'set' and node.args == node.keywords == []):
 101 |             return set()
 102 |         elif isinstance(node, Dict):
 103 |             if len(node.keys) != len(node.values):
 104 |                 _raise_malformed_node(node)
 105 |             return dict(zip(map(_convert, node.keys),
 106 |                             map(_convert, node.values)))
 107 |         elif isinstance(node, BinOp) and isinstance(node.op, (Add, Sub)):
 108 |             left = _convert_signed_num(node.left)
 109 |             right = _convert_num(node.right)
 110 |             if isinstance(left, (int, float)) and isinstance(right, complex):
 111 |                 if isinstance(node.op, Add):
 112 |                     return left + right
 113 |                 else:
 114 |                     return left - right
 115 |         return _convert_signed_num(node)
 116 |     return _convert(node_or_string)
 117 | 
 118 | 
 119 | def dump(node, annotate_fields=True, include_attributes=False, *, indent=None):
 120 |     """
 121 |     Return a formatted dump of the tree in node.  This is mainly useful for
 122 |     debugging purposes.  If annotate_fields is true (by default),
 123 |     the returned string will show the names and the values for fields.
 124 |     If annotate_fields is false, the result string will be more compact by
 125 |     omitting unambiguous field names.  Attributes such as line
 126 |     numbers and column offsets are not dumped by default.  If this is wanted,
 127 |     include_attributes can be set to true.  If indent is a non-negative
 128 |     integer or string, then the tree will be pretty-printed with that indent
 129 |     level. None (the default) selects the single line representation.
 130 |     """
 131 |     def _format(node, level=0):
 132 |         if indent is not None:
 133 |             level += 1
 134 |             prefix = '\n' + indent * level
 135 |             sep = ',\n' + indent * level
 136 |         else:
 137 |             prefix = ''
 138 |             sep = ', '
 139 |         if isinstance(node, AST):
 140 |             cls = type(node)
 141 |             args = []
 142 |             allsimple = True
 143 |             keywords = annotate_fields
 144 |             for name in node._fields:
 145 |                 try:
 146 |                     value = getattr(node, name)
 147 |                 except AttributeError:
 148 |                     keywords = True
 149 |                     continue
 150 |                 if value is None and getattr(cls, name, ...) is None:
 151 |                     keywords = True
 152 |                     continue
 153 |                 value, simple = _format(value, level)
 154 |                 allsimple = allsimple and simple
 155 |                 if keywords:
 156 |                     args.append('%s=%s' % (name, value))
 157 |                 else:
 158 |                     args.append(value)
 159 |             if include_attributes and node._attributes:
 160 |                 for name in node._attributes:
 161 |                     try:
 162 |                         value = getattr(node, name)
 163 |                     except AttributeError:
 164 |                         continue
 165 |                     if value is None and getattr(cls, name, ...) is None:
 166 |                         continue
 167 |                     value, simple = _format(value, level)
 168 |                     allsimple = allsimple and simple
 169 |                     args.append('%s=%s' % (name, value))
 170 |             if allsimple and len(args) <= 3:
 171 |                 return '%s(%s)' % (node.__class__.__name__, ', '.join(args)), not args
 172 |             return '%s(%s%s)' % (node.__class__.__name__, prefix, sep.join(args)), False
 173 |         elif isinstance(node, list):
 174 |             if not node:
 175 |                 return '[]', True
 176 |             return '[%s%s]' % (prefix, sep.join(_format(x, level)[0] for x in node)), False
 177 |         return repr(node), True
 178 | 
 179 |     if not isinstance(node, AST):
 180 |         raise TypeError('expected AST, got %r' % node.__class__.__name__)
 181 |     if indent is not None and not isinstance(indent, str):
 182 |         indent = ' ' * indent
 183 |     return _format(node)[0]
 184 | 
 185 | 
 186 | def copy_location(new_node, old_node):
 187 |     """
 188 |     Copy source location (`lineno`, `col_offset`, `end_lineno`, and `end_col_offset`
 189 |     attributes) from *old_node* to *new_node* if possible, and return *new_node*.
 190 |     """
 191 |     for attr in 'lineno', 'col_offset', 'end_lineno', 'end_col_offset':
 192 |         if attr in old_node._attributes and attr in new_node._attributes:
 193 |             value = getattr(old_node, attr, None)
 194 |             # end_lineno and end_col_offset are optional attributes, and they
 195 |             # should be copied whether the value is None or not.
 196 |             if value is not None or (
 197 |                 hasattr(old_node, attr) and attr.startswith("end_")
 198 |             ):
 199 |                 setattr(new_node, attr, value)
 200 |     return new_node
 201 | 
 202 | 
 203 | def fix_missing_locations(node):
 204 |     """
 205 |     When you compile a node tree with compile(), the compiler expects lineno and
 206 |     col_offset attributes for every node that supports them.  This is rather
 207 |     tedious to fill in for generated nodes, so this helper adds these attributes
 208 |     recursively where not already set, by setting them to the values of the
 209 |     parent node.  It works recursively starting at *node*.
 210 |     """
 211 |     def _fix(node, lineno, col_offset, end_lineno, end_col_offset):
 212 |         if 'lineno' in node._attributes:
 213 |             if not hasattr(node, 'lineno'):
 214 |                 node.lineno = lineno
 215 |             else:
 216 |                 lineno = node.lineno
 217 |         if 'end_lineno' in node._attributes:
 218 |             if getattr(node, 'end_lineno', None) is None:
 219 |                 node.end_lineno = end_lineno
 220 |             else:
 221 |                 end_lineno = node.end_lineno
 222 |         if 'col_offset' in node._attributes:
 223 |             if not hasattr(node, 'col_offset'):
 224 |                 node.col_offset = col_offset
 225 |             else:
 226 |                 col_offset = node.col_offset
 227 |         if 'end_col_offset' in node._attributes:
 228 |             if getattr(node, 'end_col_offset', None) is None:
 229 |                 node.end_col_offset = end_col_offset
 230 |             else:
 231 |                 end_col_offset = node.end_col_offset
 232 |         for child in iter_child_nodes(node):
 233 |             _fix(child, lineno, col_offset, end_lineno, end_col_offset)
 234 |     _fix(node, 1, 0, 1, 0)
 235 |     return node
 236 | 
 237 | 
 238 | def increment_lineno(node, n=1):
 239 |     """
 240 |     Increment the line number and end line number of each node in the tree
 241 |     starting at *node* by *n*. This is useful to "move code" to a different
 242 |     location in a file.
 243 |     """
 244 |     for child in walk(node):
 245 |         # TypeIgnore is a special case where lineno is not an attribute
 246 |         # but rather a field of the node itself.
 247 |         if isinstance(child, TypeIgnore):
 248 |             child.lineno = getattr(child, 'lineno', 0) + n
 249 |             continue
 250 | 
 251 |         if 'lineno' in child._attributes:
 252 |             child.lineno = getattr(child, 'lineno', 0) + n
 253 |         if (
 254 |             "end_lineno" in child._attributes
 255 |             and (end_lineno := getattr(child, "end_lineno", 0)) is not None
 256 |         ):
 257 |             child.end_lineno = end_lineno + n
 258 |     return node
 259 | 
 260 | 
 261 | def iter_fields(node):
 262 |     """
 263 |     Yield a tuple of ``(fieldname, value)`` for each field in ``node._fields``
 264 |     that is present on *node*.
 265 |     """
 266 |     for field in node._fields:
 267 |         try:
 268 |             yield field, getattr(node, field)
 269 |         except AttributeError:
 270 |             pass
 271 | 
 272 | 
 273 | def iter_child_nodes(node):
 274 |     """
 275 |     Yield all direct child nodes of *node*, that is, all fields that are nodes
 276 |     and all items of fields that are lists of nodes.
 277 |     """
 278 |     for name, field in iter_fields(node):
 279 |         if isinstance(field, AST):
 280 |             yield field
 281 |         elif isinstance(field, list):
 282 |             for item in field:
 283 |                 if isinstance(item, AST):
 284 |                     yield item
 285 | 
 286 | 
 287 | def get_docstring(node, clean=True):
 288 |     """
 289 |     Return the docstring for the given node or None if no docstring can
 290 |     be found.  If the node provided does not have docstrings a TypeError
 291 |     will be raised.
 292 | 
 293 |     If *clean* is `True`, all tabs are expanded to spaces and any whitespace
 294 |     that can be uniformly removed from the second line onwards is removed.
 295 |     """
 296 |     if not isinstance(node, (AsyncFunctionDef, FunctionDef, ClassDef, Module)):
 297 |         raise TypeError("%r can't have docstrings" % node.__class__.__name__)
 298 |     if not(node.body and isinstance(node.body[0], Expr)):
 299 |         return None
 300 |     node = node.body[0].value
 301 |     if isinstance(node, Constant) and isinstance(node.value, str):
 302 |         text = node.value
 303 |     else:
 304 |         return None
 305 |     if clean:
 306 |         import inspect
 307 |         text = inspect.cleandoc(text)
 308 |     return text
 309 | 
 310 | 
 311 | _line_pattern = re.compile(r"(.*?(?:\r\n|\n|\r|$))")
 312 | def _splitlines_no_ff(source, maxlines=None):
 313 |     """Split a string into lines ignoring form feed and other chars.
 314 | 
 315 |     This mimics how the Python parser splits source code.
 316 |     """
 317 |     lines = []
 318 |     for lineno, match in enumerate(_line_pattern.finditer(source), 1):
 319 |         if maxlines is not None and lineno > maxlines:
 320 |             break
 321 |         lines.append(match[0])
 322 |     return lines
 323 | 
 324 | 
 325 | def _pad_whitespace(source):
 326 |     r"""Replace all chars except '\f\t' in a line with spaces."""
 327 |     result = ''
 328 |     for c in source:
 329 |         if c in '\f\t':
 330 |             result += c
 331 |         else:
 332 |             result += ' '
 333 |     return result
 334 | 
 335 | 
 336 | def get_source_segment(source, node, *, padded=False):
 337 |     """Get source code segment of the *source* that generated *node*.
 338 | 
 339 |     If some location information (`lineno`, `end_lineno`, `col_offset`,
 340 |     or `end_col_offset`) is missing, return None.
 341 | 
 342 |     If *padded* is `True`, the first line of a multi-line statement will
 343 |     be padded with spaces to match its original position.
 344 |     """
 345 |     try:
 346 |         if node.end_lineno is None or node.end_col_offset is None:
 347 |             return None
 348 |         lineno = node.lineno - 1
 349 |         end_lineno = node.end_lineno - 1
 350 |         col_offset = node.col_offset
 351 |         end_col_offset = node.end_col_offset
 352 |     except AttributeError:
 353 |         return None
 354 | 
 355 |     lines = _splitlines_no_ff(source, maxlines=end_lineno+1)
 356 |     if end_lineno == lineno:
 357 |         return lines[lineno].encode()[col_offset:end_col_offset].decode()
 358 | 
 359 |     if padded:
 360 |         padding = _pad_whitespace(lines[lineno].encode()[:col_offset].decode())
 361 |     else:
 362 |         padding = ''
 363 | 
 364 |     first = padding + lines[lineno].encode()[col_offset:].decode()
 365 |     last = lines[end_lineno].encode()[:end_col_offset].decode()
 366 |     lines = lines[lineno+1:end_lineno]
 367 | 
 368 |     lines.insert(0, first)
 369 |     lines.append(last)
 370 |     return ''.join(lines)
 371 | 
 372 | 
 373 | def walk(node):
 374 |     """
 375 |     Recursively yield all descendant nodes in the tree starting at *node*
 376 |     (including *node* itself), in no specified order.  This is useful if you
 377 |     only want to modify nodes in place and don't care about the context.
 378 |     """
 379 |     from collections import deque
 380 |     todo = deque([node])
 381 |     while todo:
 382 |         node = todo.popleft()
 383 |         todo.extend(iter_child_nodes(node))
 384 |         yield node
 385 | 
 386 | 
 387 | class NodeVisitor(object):
 388 |     """
 389 |     A node visitor base class that walks the abstract syntax tree and calls a
 390 |     visitor function for every node found.  This function may return a value
 391 |     which is forwarded by the `visit` method.
 392 | 
 393 |     This class is meant to be subclassed, with the subclass adding visitor
 394 |     methods.
 395 | 
 396 |     Per default the visitor functions for the nodes are ``'visit_'`` +
 397 |     class name of the node.  So a `TryFinally` node visit function would
 398 |     be `visit_TryFinally`.  This behavior can be changed by overriding
 399 |     the `visit` method.  If no visitor function exists for a node
 400 |     (return value `None`) the `generic_visit` visitor is used instead.
 401 | 
 402 |     Don't use the `NodeVisitor` if you want to apply changes to nodes during
 403 |     traversing.  For this a special visitor exists (`NodeTransformer`) that
 404 |     allows modifications.
 405 |     """
 406 | 
 407 |     def visit(self, node):
 408 |         """Visit a node."""
 409 |         method = 'visit_' + node.__class__.__name__
 410 |         visitor = getattr(self, method, self.generic_visit)
 411 |         return visitor(node)
 412 | 
 413 |     def generic_visit(self, node):
 414 |         """Called if no explicit visitor function exists for a node."""
 415 |         for field, value in iter_fields(node):
 416 |             if isinstance(value, list):
 417 |                 for item in value:
 418 |                     if isinstance(item, AST):
 419 |                         self.visit(item)
 420 |             elif isinstance(value, AST):
 421 |                 self.visit(value)
 422 | 
 423 |     def visit_Constant(self, node):
 424 |         value = node.value
 425 |         type_name = _const_node_type_names.get(type(value))
 426 |         if type_name is None:
 427 |             for cls, name in _const_node_type_names.items():
 428 |                 if isinstance(value, cls):
 429 |                     type_name = name
 430 |                     break
 431 |         if type_name is not None:
 432 |             method = 'visit_' + type_name
 433 |             try:
 434 |                 visitor = getattr(self, method)
 435 |             except AttributeError:
 436 |                 pass
 437 |             else:
 438 |                 import warnings
 439 |                 warnings.warn(f"{method} is deprecated; add visit_Constant",
 440 |                               DeprecationWarning, 2)
 441 |                 return visitor(node)
 442 |         return self.generic_visit(node)
 443 | 
 444 | 
 445 | class NodeTransformer(NodeVisitor):
 446 |     """
 447 |     A :class:`NodeVisitor` subclass that walks the abstract syntax tree and
 448 |     allows modification of nodes.
 449 | 
 450 |     The `NodeTransformer` will walk the AST and use the return value of the
 451 |     visitor methods to replace or remove the old node.  If the return value of
 452 |     the visitor method is ``None``, the node will be removed from its location,
 453 |     otherwise it is replaced with the return value.  The return value may be the
 454 |     original node in which case no replacement takes place.
 455 | 
 456 |     Here is an example transformer that rewrites all occurrences of name lookups
 457 |     (``foo``) to ``data['foo']``::
 458 | 
 459 |        class RewriteName(NodeTransformer):
 460 | 
 461 |            def visit_Name(self, node):
 462 |                return Subscript(
 463 |                    value=Name(id='data', ctx=Load()),
 464 |                    slice=Constant(value=node.id),
 465 |                    ctx=node.ctx
 466 |                )
 467 | 
 468 |     Keep in mind that if the node you're operating on has child nodes you must
 469 |     either transform the child nodes yourself or call the :meth:`generic_visit`
 470 |     method for the node first.
 471 | 
 472 |     For nodes that were part of a collection of statements (that applies to all
 473 |     statement nodes), the visitor may also return a list of nodes rather than
 474 |     just a single node.
 475 | 
 476 |     Usually you use the transformer like this::
 477 | 
 478 |        node = YourTransformer().visit(node)
 479 |     """
 480 | 
 481 |     def generic_visit(self, node):
 482 |         for field, old_value in iter_fields(node):
 483 |             if isinstance(old_value, list):
 484 |                 new_values = []
 485 |                 for value in old_value:
 486 |                     if isinstance(value, AST):
 487 |                         value = self.visit(value)
 488 |                         if value is None:
 489 |                             continue
 490 |                         elif not isinstance(value, AST):
 491 |                             new_values.extend(value)
 492 |                             continue
 493 |                     new_values.append(value)
 494 |                 old_value[:] = new_values
 495 |             elif isinstance(old_value, AST):
 496 |                 new_node = self.visit(old_value)
 497 |                 if new_node is None:
 498 |                     delattr(node, field)
 499 |                 else:
 500 |                     setattr(node, field, new_node)
 501 |         return node
 502 | 
 503 | 
 504 | _DEPRECATED_VALUE_ALIAS_MESSAGE = (
 505 |     "{name} is deprecated and will be removed in Python {remove}; use value instead"
 506 | )
 507 | _DEPRECATED_CLASS_MESSAGE = (
 508 |     "{name} is deprecated and will be removed in Python {remove}; "
 509 |     "use ast.Constant instead"
 510 | )
 511 | 
 512 | 
 513 | # If the ast module is loaded more than once, only add deprecated methods once
 514 | if not hasattr(Constant, 'n'):
 515 |     # The following code is for backward compatibility.
 516 |     # It will be removed in future.
 517 | 
 518 |     def _n_getter(self):
 519 |         """Deprecated. Use value instead."""
 520 |         import warnings
 521 |         warnings._deprecated(
 522 |             "Attribute n", message=_DEPRECATED_VALUE_ALIAS_MESSAGE, remove=(3, 14)
 523 |         )
 524 |         return self.value
 525 | 
 526 |     def _n_setter(self, value):
 527 |         import warnings
 528 |         warnings._deprecated(
 529 |             "Attribute n", message=_DEPRECATED_VALUE_ALIAS_MESSAGE, remove=(3, 14)
 530 |         )
 531 |         self.value = value
 532 | 
 533 |     def _s_getter(self):
 534 |         """Deprecated. Use value instead."""
 535 |         import warnings
 536 |         warnings._deprecated(
 537 |             "Attribute s", message=_DEPRECATED_VALUE_ALIAS_MESSAGE, remove=(3, 14)
 538 |         )
 539 |         return self.value
 540 | 
 541 |     def _s_setter(self, value):
 542 |         import warnings
 543 |         warnings._deprecated(
 544 |             "Attribute s", message=_DEPRECATED_VALUE_ALIAS_MESSAGE, remove=(3, 14)
 545 |         )
 546 |         self.value = value
 547 | 
 548 |     Constant.n = property(_n_getter, _n_setter)
 549 |     Constant.s = property(_s_getter, _s_setter)
 550 | 
 551 | class _ABC(type):
 552 | 
 553 |     def __init__(cls, *args):
 554 |         cls.__doc__ = """Deprecated AST node class. Use ast.Constant instead"""
 555 | 
 556 |     def __instancecheck__(cls, inst):
 557 |         if cls in _const_types:
 558 |             import warnings
 559 |             warnings._deprecated(
 560 |                 f"ast.{cls.__qualname__}",
 561 |                 message=_DEPRECATED_CLASS_MESSAGE,
 562 |                 remove=(3, 14)
 563 |             )
 564 |         if not isinstance(inst, Constant):
 565 |             return False
 566 |         if cls in _const_types:
 567 |             try:
 568 |                 value = inst.value
 569 |             except AttributeError:
 570 |                 return False
 571 |             else:
 572 |                 return (
 573 |                     isinstance(value, _const_types[cls]) and
 574 |                     not isinstance(value, _const_types_not.get(cls, ()))
 575 |                 )
 576 |         return type.__instancecheck__(cls, inst)
 577 | 
 578 | def _new(cls, *args, **kwargs):
 579 |     for key in kwargs:
 580 |         if key not in cls._fields:
 581 |             # arbitrary keyword arguments are accepted
 582 |             continue
 583 |         pos = cls._fields.index(key)
 584 |         if pos < len(args):
 585 |             raise TypeError(f"{cls.__name__} got multiple values for argument {key!r}")
 586 |     if cls in _const_types:
 587 |         import warnings
 588 |         warnings._deprecated(
 589 |             f"ast.{cls.__qualname__}", message=_DEPRECATED_CLASS_MESSAGE, remove=(3, 14)
 590 |         )
 591 |         return Constant(*args, **kwargs)
 592 |     return Constant.__new__(cls, *args, **kwargs)
 593 | 
 594 | class Num(Constant, metaclass=_ABC):
 595 |     _fields = ('n',)
 596 |     __new__ = _new
 597 | 
 598 | class Str(Constant, metaclass=_ABC):
 599 |     _fields = ('s',)
 600 |     __new__ = _new
 601 | 
 602 | class Bytes(Constant, metaclass=_ABC):
 603 |     _fields = ('s',)
 604 |     __new__ = _new
 605 | 
 606 | class NameConstant(Constant, metaclass=_ABC):
 607 |     __new__ = _new
 608 | 
 609 | class Ellipsis(Constant, metaclass=_ABC):
 610 |     _fields = ()
 611 | 
 612 |     def __new__(cls, *args, **kwargs):
 613 |         if cls is _ast_Ellipsis:
 614 |             import warnings
 615 |             warnings._deprecated(
 616 |                 "ast.Ellipsis", message=_DEPRECATED_CLASS_MESSAGE, remove=(3, 14)
 617 |             )
 618 |             return Constant(..., *args, **kwargs)
 619 |         return Constant.__new__(cls, *args, **kwargs)
 620 | 
 621 | # Keep another reference to Ellipsis in the global namespace
 622 | # so it can be referenced in Ellipsis.__new__
 623 | # (The original "Ellipsis" name is removed from the global namespace later on)
 624 | _ast_Ellipsis = Ellipsis
 625 | 
 626 | _const_types = {
 627 |     Num: (int, float, complex),
 628 |     Str: (str,),
 629 |     Bytes: (bytes,),
 630 |     NameConstant: (type(None), bool),
 631 |     Ellipsis: (type(...),),
 632 | }
 633 | _const_types_not = {
 634 |     Num: (bool,),
 635 | }
 636 | 
 637 | _const_node_type_names = {
 638 |     bool: 'NameConstant',  # should be before int
 639 |     type(None): 'NameConstant',
 640 |     int: 'Num',
 641 |     float: 'Num',
 642 |     complex: 'Num',
 643 |     str: 'Str',
 644 |     bytes: 'Bytes',
 645 |     type(...): 'Ellipsis',
 646 | }
 647 | 
 648 | class slice(AST):
 649 |     """Deprecated AST node class."""
 650 | 
 651 | class Index(slice):
 652 |     """Deprecated AST node class. Use the index value directly instead."""
 653 |     def __new__(cls, value, **kwargs):
 654 |         return value
 655 | 
 656 | class ExtSlice(slice):
 657 |     """Deprecated AST node class. Use ast.Tuple instead."""
 658 |     def __new__(cls, dims=(), **kwargs):
 659 |         return Tuple(list(dims), Load(), **kwargs)
 660 | 
 661 | # If the ast module is loaded more than once, only add deprecated methods once
 662 | if not hasattr(Tuple, 'dims'):
 663 |     # The following code is for backward compatibility.
 664 |     # It will be removed in future.
 665 | 
 666 |     def _dims_getter(self):
 667 |         """Deprecated. Use elts instead."""
 668 |         return self.elts
 669 | 
 670 |     def _dims_setter(self, value):
 671 |         self.elts = value
 672 | 
 673 |     Tuple.dims = property(_dims_getter, _dims_setter)
 674 | 
 675 | class Suite(mod):
 676 |     """Deprecated AST node class.  Unused in Python 3."""
 677 | 
 678 | class AugLoad(expr_context):
 679 |     """Deprecated AST node class.  Unused in Python 3."""
 680 | 
 681 | class AugStore(expr_context):
 682 |     """Deprecated AST node class.  Unused in Python 3."""
 683 | 
 684 | class Param(expr_context):
 685 |     """Deprecated AST node class.  Unused in Python 3."""
 686 | 
 687 | 
 688 | # Large float and imaginary literals get turned into infinities in the AST.
 689 | # We unparse those infinities to INFSTR.
 690 | _INFSTR = "1e" + repr(sys.float_info.max_10_exp + 1)
 691 | 
 692 | @_simple_enum(IntEnum)
 693 | class _Precedence:
 694 |     """Precedence table that originated from python grammar."""
 695 | 
 696 |     NAMED_EXPR = auto()      # <target> := <expr1>
 697 |     TUPLE = auto()           # <expr1>, <expr2>
 698 |     YIELD = auto()           # 'yield', 'yield from'
 699 |     TEST = auto()            # 'if'-'else', 'lambda'
 700 |     OR = auto()              # 'or'
 701 |     AND = auto()             # 'and'
 702 |     NOT = auto()             # 'not'
 703 |     CMP = auto()             # '<', '>', '==', '>=', '<=', '!=',
 704 |                              # 'in', 'not in', 'is', 'is not'
 705 |     EXPR = auto()
 706 |     BOR = EXPR               # '|'
 707 |     BXOR = auto()            # '^'
 708 |     BAND = auto()            # '&'
 709 |     SHIFT = auto()           # '<<', '>>'
 710 |     ARITH = auto()           # '+', '-'
 711 |     TERM = auto()            # '*', '@', '/', '%', '//'
 712 |     FACTOR = auto()          # unary '+', '-', '~'
 713 |     POWER = auto()           # '**'
 714 |     AWAIT = auto()           # 'await'
 715 |     ATOM = auto()
 716 | 
 717 |     def next(self):
 718 |         try:
 719 |             return self.__class__(self + 1)
 720 |         except ValueError:
 721 |             return self
 722 | 
 723 | 
 724 | _SINGLE_QUOTES = ("'", '"')
 725 | _MULTI_QUOTES = ('"""', "'''")
 726 | _ALL_QUOTES = (*_SINGLE_QUOTES, *_MULTI_QUOTES)
 727 | 
 728 | class _Unparser(NodeVisitor):
 729 |     """Methods in this class recursively traverse an AST and
 730 |     output source code for the abstract syntax; original formatting
 731 |     is disregarded."""
 732 | 
 733 |     def __init__(self, *, _avoid_backslashes=False):
 734 |         self._source = []
 735 |         self._precedences = {}
 736 |         self._type_ignores = {}
 737 |         self._indent = 0
 738 |         self._avoid_backslashes = _avoid_backslashes
 739 |         self._in_try_star = False
 740 | 
 741 |     def interleave(self, inter, f, seq):
 742 |         """Call f on each item in seq, calling inter() in between."""
 743 |         seq = iter(seq)
 744 |         try:
 745 |             f(next(seq))
 746 |         except StopIteration:
 747 |             pass
 748 |         else:
 749 |             for x in seq:
 750 |                 inter()
 751 |                 f(x)
 752 | 
 753 |     def items_view(self, traverser, items):
 754 |         """Traverse and separate the given *items* with a comma and append it to
 755 |         the buffer. If *items* is a single item sequence, a trailing comma
 756 |         will be added."""
 757 |         if len(items) == 1:
 758 |             traverser(items[0])
 759 |             self.write(",")
 760 |         else:
 761 |             self.interleave(lambda: self.write(", "), traverser, items)
 762 | 
 763 |     def maybe_newline(self):
 764 |         """Adds a newline if it isn't the start of generated source"""
 765 |         if self._source:
 766 |             self.write("\n")
 767 | 
 768 |     def fill(self, text=""):
 769 |         """Indent a piece of text and append it, according to the current
 770 |         indentation level"""
 771 |         self.maybe_newline()
 772 |         self.write("    " * self._indent + text)
 773 | 
 774 |     def write(self, *text):
 775 |         """Add new source parts"""
 776 |         self._source.extend(text)
 777 | 
 778 |     @contextmanager
 779 |     def buffered(self, buffer = None):
 780 |         if buffer is None:
 781 |             buffer = []
 782 | 
 783 |         original_source = self._source
 784 |         self._source = buffer
 785 |         yield buffer
 786 |         self._source = original_source
 787 | 
 788 |     @contextmanager
 789 |     def block(self, *, extra = None):
 790 |         """A context manager for preparing the source for blocks. It adds
 791 |         the character':', increases the indentation on enter and decreases
 792 |         the indentation on exit. If *extra* is given, it will be directly
 793 |         appended after the colon character.
 794 |         """
 795 |         self.write(":")
 796 |         if extra:
 797 |             self.write(extra)
 798 |         self._indent += 1
 799 |         yield
 800 |         self._indent -= 1
 801 | 
 802 |     @contextmanager
 803 |     def delimit(self, start, end):
 804 |         """A context manager for preparing the source for expressions. It adds
 805 |         *start* to the buffer and enters, after exit it adds *end*."""
 806 | 
 807 |         self.write(start)
 808 |         yield
 809 |         self.write(end)
 810 | 
 811 |     def delimit_if(self, start, end, condition):
 812 |         if condition:
 813 |             return self.delimit(start, end)
 814 |         else:
 815 |             return nullcontext()
 816 | 
 817 |     def require_parens(self, precedence, node):
 818 |         """Shortcut to adding precedence related parens"""
 819 |         return self.delimit_if("(", ")", self.get_precedence(node) > precedence)
 820 | 
 821 |     def get_precedence(self, node):
 822 |         return self._precedences.get(node, _Precedence.TEST)
 823 | 
 824 |     def set_precedence(self, precedence, *nodes):
 825 |         for node in nodes:
 826 |             self._precedences[node] = precedence
 827 | 
 828 |     def get_raw_docstring(self, node):
 829 |         """If a docstring node is found in the body of the *node* parameter,
 830 |         return that docstring node, None otherwise.
 831 | 
 832 |         Logic mirrored from ``_PyAST_GetDocString``."""
 833 |         if not isinstance(
 834 |             node, (AsyncFunctionDef, FunctionDef, ClassDef, Module)
 835 |         ) or len(node.body) < 1:
 836 |             return None
 837 |         node = node.body[0]
 838 |         if not isinstance(node, Expr):
 839 |             return None
 840 |         node = node.value
 841 |         if isinstance(node, Constant) and isinstance(node.value, str):
 842 |             return node
 843 | 
 844 |     def get_type_comment(self, node):
 845 |         comment = self._type_ignores.get(node.lineno) or node.type_comment
 846 |         if comment is not None:
 847 |             return f" # type: {comment}"
 848 | 
 849 |     def traverse(self, node):
 850 |         if isinstance(node, list):
 851 |             for item in node:
 852 |                 self.traverse(item)
 853 |         else:
 854 |             super().visit(node)
 855 | 
 856 |     # Note: as visit() resets the output text, do NOT rely on
 857 |     # NodeVisitor.generic_visit to handle any nodes (as it calls back in to
 858 |     # the subclass visit() method, which resets self._source to an empty list)
 859 |     def visit(self, node):
 860 |         """Outputs a source code string that, if converted back to an ast
 861 |         (using ast.parse) will generate an AST equivalent to *node*"""
 862 |         self._source = []
 863 |         self.traverse(node)
 864 |         return "".join(self._source)
 865 | 
 866 |     def _write_docstring_and_traverse_body(self, node):
 867 |         if (docstring := self.get_raw_docstring(node)):
 868 |             self._write_docstring(docstring)
 869 |             self.traverse(node.body[1:])
 870 |         else:
 871 |             self.traverse(node.body)
 872 | 
 873 |     def visit_Module(self, node):
 874 |         self._type_ignores = {
 875 |             ignore.lineno: f"ignore{ignore.tag}"
 876 |             for ignore in node.type_ignores
 877 |         }
 878 |         self._write_docstring_and_traverse_body(node)
 879 |         self._type_ignores.clear()
 880 | 
 881 |     def visit_FunctionType(self, node):
 882 |         with self.delimit("(", ")"):
 883 |             self.interleave(
 884 |                 lambda: self.write(", "), self.traverse, node.argtypes
 885 |             )
 886 | 
 887 |         self.write(" -> ")
 888 |         self.traverse(node.returns)
 889 | 
 890 |     def visit_Expr(self, node):
 891 |         self.fill()
 892 |         self.set_precedence(_Precedence.YIELD, node.value)
 893 |         self.traverse(node.value)
 894 | 
 895 |     def visit_NamedExpr(self, node):
 896 |         with self.require_parens(_Precedence.NAMED_EXPR, node):
 897 |             self.set_precedence(_Precedence.ATOM, node.target, node.value)
 898 |             self.traverse(node.target)
 899 |             self.write(" := ")
 900 |             self.traverse(node.value)
 901 | 
 902 |     def visit_Import(self, node):
 903 |         self.fill("import ")
 904 |         self.interleave(lambda: self.write(", "), self.traverse, node.names)
 905 | 
 906 |     def visit_ImportFrom(self, node):
 907 |         self.fill("from ")
 908 |         self.write("." * (node.level or 0))
 909 |         if node.module:
 910 |             self.write(node.module)
 911 |         self.write(" import ")
 912 |         self.interleave(lambda: self.write(", "), self.traverse, node.names)
 913 | 
 914 |     def visit_Assign(self, node):
 915 |         self.fill()
 916 |         for target in node.targets:
 917 |             self.set_precedence(_Precedence.TUPLE, target)
 918 |             self.traverse(target)
 919 |             self.write(" = ")
 920 |         self.traverse(node.value)
 921 |         if type_comment := self.get_type_comment(node):
 922 |             self.write(type_comment)
 923 | 
 924 |     def visit_AugAssign(self, node):
 925 |         self.fill()
 926 |         self.traverse(node.target)
 927 |         self.write(" " + self.binop[node.op.__class__.__name__] + "= ")
 928 |         self.traverse(node.value)
 929 | 
 930 |     def visit_AnnAssign(self, node):
 931 |         self.fill()
 932 |         with self.delimit_if("(", ")", not node.simple and isinstance(node.target, Name)):
 933 |             self.traverse(node.target)
 934 |         self.write(": ")
 935 |         self.traverse(node.annotation)
 936 |         if node.value:
 937 |             self.write(" = ")
 938 |             self.traverse(node.value)
 939 | 
 940 |     def visit_Return(self, node):
 941 |         self.fill("return")
 942 |         if node.value:
 943 |             self.write(" ")
 944 |             self.traverse(node.value)
 945 | 
 946 |     def visit_Pass(self, node):
 947 |         self.fill("pass")
 948 | 
 949 |     def visit_Break(self, node):
 950 |         self.fill("break")
 951 | 
 952 |     def visit_Continue(self, node):
 953 |         self.fill("continue")
 954 | 
 955 |     def visit_Delete(self, node):
 956 |         self.fill("del ")
 957 |         self.interleave(lambda: self.write(", "), self.traverse, node.targets)
 958 | 
 959 |     def visit_Assert(self, node):
 960 |         self.fill("assert ")
 961 |         self.traverse(node.test)
 962 |         if node.msg:
 963 |             self.write(", ")
 964 |             self.traverse(node.msg)
 965 | 
 966 |     def visit_Global(self, node):
 967 |         self.fill("global ")
 968 |         self.interleave(lambda: self.write(", "), self.write, node.names)
 969 | 
 970 |     def visit_Nonlocal(self, node):
 971 |         self.fill("nonlocal ")
 972 |         self.interleave(lambda: self.write(", "), self.write, node.names)
 973 | 
 974 |     def visit_Await(self, node):
 975 |         with self.require_parens(_Precedence.AWAIT, node):
 976 |             self.write("await")
 977 |             if node.value:
 978 |                 self.write(" ")
 979 |                 self.set_precedence(_Precedence.ATOM, node.value)
 980 |                 self.traverse(node.value)
 981 | 
 982 |     def visit_Yield(self, node):
 983 |         with self.require_parens(_Precedence.YIELD, node):
 984 |             self.write("yield")
 985 |             if node.value:
 986 |                 self.write(" ")
 987 |                 self.set_precedence(_Precedence.ATOM, node.value)
 988 |                 self.traverse(node.value)
 989 | 
 990 |     def visit_YieldFrom(self, node):
 991 |         with self.require_parens(_Precedence.YIELD, node):
 992 |             self.write("yield from ")
 993 |             if not node.value:
 994 |                 raise ValueError("Node can't be used without a value attribute.")
 995 |             self.set_precedence(_Precedence.ATOM, node.value)
 996 |             self.traverse(node.value)
 997 | 
 998 |     def visit_Raise(self, node):
 999 |         self.fill("raise")
1000 |         if not node.exc:
1001 |             if node.cause:
1002 |                 raise ValueError(f"Node can't use cause without an exception.")
1003 |             return
1004 |         self.write(" ")
1005 |         self.traverse(node.exc)
1006 |         if node.cause:
1007 |             self.write(" from ")
1008 |             self.traverse(node.cause)
1009 | 
1010 |     def do_visit_try(self, node):
1011 |         self.fill("try")
1012 |         with self.block():
1013 |             self.traverse(node.body)
1014 |         for ex in node.handlers:
1015 |             self.traverse(ex)
1016 |         if node.orelse:
1017 |             self.fill("else")
1018 |             with self.block():
1019 |                 self.traverse(node.orelse)
1020 |         if node.finalbody:
1021 |             self.fill("finally")
1022 |             with self.block():
1023 |                 self.traverse(node.finalbody)
1024 | 
1025 |     def visit_Try(self, node):
1026 |         prev_in_try_star = self._in_try_star
1027 |         try:
1028 |             self._in_try_star = False
1029 |             self.do_visit_try(node)
1030 |         finally:
1031 |             self._in_try_star = prev_in_try_star
1032 | 
1033 |     def visit_TryStar(self, node):
1034 |         prev_in_try_star = self._in_try_star
1035 |         try:
1036 |             self._in_try_star = True
1037 |             self.do_visit_try(node)
1038 |         finally:
1039 |             self._in_try_star = prev_in_try_star
1040 | 
1041 |     def visit_ExceptHandler(self, node):
1042 |         self.fill("except*" if self._in_try_star else "except")
1043 |         if node.type:
1044 |             self.write(" ")
1045 |             self.traverse(node.type)
1046 |         if node.name:
1047 |             self.write(" as ")
1048 |             self.write(node.name)
1049 |         with self.block():
1050 |             self.traverse(node.body)
1051 | 
1052 |     def visit_ClassDef(self, node):
1053 |         self.maybe_newline()
1054 |         for deco in node.decorator_list:
1055 |             self.fill("@")
1056 |             self.traverse(deco)
1057 |         self.fill("class " + node.name)
1058 |         if hasattr(node, "type_params"):
1059 |             self._type_params_helper(node.type_params)
1060 |         with self.delimit_if("(", ")", condition = node.bases or node.keywords):
1061 |             comma = False
1062 |             for e in node.bases:
1063 |                 if comma:
1064 |                     self.write(", ")
1065 |                 else:
1066 |                     comma = True
1067 |                 self.traverse(e)
1068 |             for e in node.keywords:
1069 |                 if comma:
1070 |                     self.write(", ")
1071 |                 else:
1072 |                     comma = True
1073 |                 self.traverse(e)
1074 | 
1075 |         with self.block():
1076 |             self._write_docstring_and_traverse_body(node)
1077 | 
1078 |     def visit_FunctionDef(self, node):
1079 |         self._function_helper(node, "def")
1080 | 
1081 |     def visit_AsyncFunctionDef(self, node):
1082 |         self._function_helper(node, "async def")
1083 | 
1084 |     def _function_helper(self, node, fill_suffix):
1085 |         self.maybe_newline()
1086 |         for deco in node.decorator_list:
1087 |             self.fill("@")
1088 |             self.traverse(deco)
1089 |         def_str = fill_suffix + " " + node.name
1090 |         self.fill(def_str)
1091 |         if hasattr(node, "type_params"):
1092 |             self._type_params_helper(node.type_params)
1093 |         with self.delimit("(", ")"):
1094 |             self.traverse(node.args)
1095 |         if node.returns:
1096 |             self.write(" -> ")
1097 |             self.traverse(node.returns)
1098 |         with self.block(extra=self.get_type_comment(node)):
1099 |             self._write_docstring_and_traverse_body(node)
1100 | 
1101 |     def _type_params_helper(self, type_params):
1102 |         if type_params is not None and len(type_params) > 0:
1103 |             with self.delimit("[", "]"):
1104 |                 self.interleave(lambda: self.write(", "), self.traverse, type_params)
1105 | 
1106 |     def visit_TypeVar(self, node):
1107 |         self.write(node.name)
1108 |         if node.bound:
1109 |             self.write(": ")
1110 |             self.traverse(node.bound)
1111 | 
1112 |     def visit_TypeVarTuple(self, node):
1113 |         self.write("*" + node.name)
1114 | 
1115 |     def visit_ParamSpec(self, node):
1116 |         self.write("**" + node.name)
1117 | 
1118 |     def visit_TypeAlias(self, node):
1119 |         self.fill("type ")
1120 |         self.traverse(node.name)
1121 |         self._type_params_helper(node.type_params)
1122 |         self.write(" = ")
1123 |         self.traverse(node.value)
1124 | 
1125 |     def visit_For(self, node):
1126 |         self._for_helper("for ", node)
1127 | 
1128 |     def visit_AsyncFor(self, node):
1129 |         self._for_helper("async for ", node)
1130 | 
1131 |     def _for_helper(self, fill, node):
1132 |         self.fill(fill)
1133 |         self.set_precedence(_Precedence.TUPLE, node.target)
1134 |         self.traverse(node.target)
1135 |         self.write(" in ")
1136 |         self.traverse(node.iter)
1137 |         with self.block(extra=self.get_type_comment(node)):
1138 |             self.traverse(node.body)
1139 |         if node.orelse:
1140 |             self.fill("else")
1141 |             with self.block():
1142 |                 self.traverse(node.orelse)
1143 | 
1144 |     def visit_If(self, node):
1145 |         self.fill("if ")
1146 |         self.traverse(node.test)
1147 |         with self.block():
1148 |             self.traverse(node.body)
1149 |         # collapse nested ifs into equivalent elifs.
1150 |         while node.orelse and len(node.orelse) == 1 and isinstance(node.orelse[0], If):
1151 |             node = node.orelse[0]
1152 |             self.fill("elif ")
1153 |             self.traverse(node.test)
1154 |             with self.block():
1155 |                 self.traverse(node.body)
1156 |         # final else
1157 |         if node.orelse:
1158 |             self.fill("else")
1159 |             with self.block():
1160 |                 self.traverse(node.orelse)
1161 | 
1162 |     def visit_While(self, node):
1163 |         self.fill("while ")
1164 |         self.traverse(node.test)
1165 |         with self.block():
1166 |             self.traverse(node.body)
1167 |         if node.orelse:
1168 |             self.fill("else")
1169 |             with self.block():
1170 |                 self.traverse(node.orelse)
1171 | 
1172 |     def visit_With(self, node):
1173 |         self.fill("with ")
1174 |         self.interleave(lambda: self.write(", "), self.traverse, node.items)
1175 |         with self.block(extra=self.get_type_comment(node)):
1176 |             self.traverse(node.body)
1177 | 
1178 |     def visit_AsyncWith(self, node):
1179 |         self.fill("async with ")
1180 |         self.interleave(lambda: self.write(", "), self.traverse, node.items)
1181 |         with self.block(extra=self.get_type_comment(node)):
1182 |             self.traverse(node.body)
1183 | 
1184 |     def _str_literal_helper(
1185 |         self, string, *, quote_types=_ALL_QUOTES, escape_special_whitespace=False
1186 |     ):
1187 |         """Helper for writing string literals, minimizing escapes.
1188 |         Returns the tuple (string literal to write, possible quote types).
1189 |         """
1190 |         def escape_char(c):
1191 |             # \n and \t are non-printable, but we only escape them if
1192 |             # escape_special_whitespace is True
1193 |             if not escape_special_whitespace and c in "\n\t":
1194 |                 return c
1195 |             # Always escape backslashes and other non-printable characters
1196 |             if c == "\\" or not c.isprintable():
1197 |                 return c.encode("unicode_escape").decode("ascii")
1198 |             return c
1199 | 
1200 |         escaped_string = "".join(map(escape_char, string))
1201 |         possible_quotes = quote_types
1202 |         if "\n" in escaped_string:
1203 |             possible_quotes = [q for q in possible_quotes if q in _MULTI_QUOTES]
1204 |         possible_quotes = [q for q in possible_quotes if q not in escaped_string]
1205 |         if not possible_quotes:
1206 |             # If there aren't any possible_quotes, fallback to using repr
1207 |             # on the original string. Try to use a quote from quote_types,
1208 |             # e.g., so that we use triple quotes for docstrings.
1209 |             string = repr(string)
1210 |             quote = next((q for q in quote_types if string[0] in q), string[0])
1211 |             return string[1:-1], [quote]
1212 |         if escaped_string:
1213 |             # Sort so that we prefer '''"''' over """\""""
1214 |             possible_quotes.sort(key=lambda q: q[0] == escaped_string[-1])
1215 |             # If we're using triple quotes and we'd need to escape a final
1216 |             # quote, escape it
1217 |             if possible_quotes[0][0] == escaped_string[-1]:
1218 |                 assert len(possible_quotes[0]) == 3
1219 |                 escaped_string = escaped_string[:-1] + "\\" + escaped_string[-1]
1220 |         return escaped_string, possible_quotes
1221 | 
1222 |     def _write_str_avoiding_backslashes(self, string, *, quote_types=_ALL_QUOTES):
1223 |         """Write string literal value with a best effort attempt to avoid backslashes."""
1224 |         string, quote_types = self._str_literal_helper(string, quote_types=quote_types)
1225 |         quote_type = quote_types[0]
1226 |         self.write(f"{quote_type}{string}{quote_type}")
1227 | 
1228 |     def visit_JoinedStr(self, node):
1229 |         self.write("f")
1230 | 
1231 |         fstring_parts = []
1232 |         for value in node.values:
1233 |             with self.buffered() as buffer:
1234 |                 self._write_fstring_inner(value)
1235 |             fstring_parts.append(
1236 |                 ("".join(buffer), isinstance(value, Constant))
1237 |             )
1238 | 
1239 |         new_fstring_parts = []
1240 |         quote_types = list(_ALL_QUOTES)
1241 |         for value, is_constant in fstring_parts:
1242 |             if is_constant:
1243 |                 value, quote_types = self._str_literal_helper(
1244 |                     value,
1245 |                     quote_types=quote_types,
1246 |                     escape_special_whitespace=True,
1247 |                 )
1248 |             elif "\n" in value:
1249 |                 quote_types = [q for q in quote_types if q in _MULTI_QUOTES]
1250 |             new_fstring_parts.append(value)
1251 | 
1252 |         value = "".join(new_fstring_parts)
1253 |         quote_type = quote_types[0]
1254 |         self.write(f"{quote_type}{value}{quote_type}")
1255 | 
1256 |     def _write_fstring_inner(self, node):
1257 |         if isinstance(node, JoinedStr):
1258 |             # for both the f-string itself, and format_spec
1259 |             for value in node.values:
1260 |                 self._write_fstring_inner(value)
1261 |         elif isinstance(node, Constant) and isinstance(node.value, str):
1262 |             value = node.value.replace("{", "{{").replace("}", "}}")
1263 |             self.write(value)
1264 |         elif isinstance(node, FormattedValue):
1265 |             self.visit_FormattedValue(node)
1266 |         else:
1267 |             raise ValueError(f"Unexpected node inside JoinedStr, {node!r}")
1268 | 
1269 |     def visit_FormattedValue(self, node):
1270 |         def unparse_inner(inner):
1271 |             unparser = type(self)()
1272 |             unparser.set_precedence(_Precedence.TEST.next(), inner)
1273 |             return unparser.visit(inner)
1274 | 
1275 |         with self.delimit("{", "}"):
1276 |             expr = unparse_inner(node.value)
1277 |             if expr.startswith("{"):
1278 |                 # Separate pair of opening brackets as "{ {"
1279 |                 self.write(" ")
1280 |             self.write(expr)
1281 |             if node.conversion != -1:
1282 |                 self.write(f"!{chr(node.conversion)}")
1283 |             if node.format_spec:
1284 |                 self.write(":")
1285 |                 self._write_fstring_inner(node.format_spec)
1286 | 
1287 |     def visit_Name(self, node):
1288 |         self.write(node.id)
1289 | 
1290 |     def _write_docstring(self, node):
1291 |         self.fill()
1292 |         if node.kind == "u":
1293 |             self.write("u")
1294 |         self._write_str_avoiding_backslashes(node.value, quote_types=_MULTI_QUOTES)
1295 | 
1296 |     def _write_constant(self, value):
1297 |         if isinstance(value, (float, complex)):
1298 |             # Substitute overflowing decimal literal for AST infinities,
1299 |             # and inf - inf for NaNs.
1300 |             self.write(
1301 |                 repr(value)
1302 |                 .replace("inf", _INFSTR)
1303 |                 .replace("nan", f"({_INFSTR}-{_INFSTR})")
1304 |             )
1305 |         elif self._avoid_backslashes and isinstance(value, str):
1306 |             self._write_str_avoiding_backslashes(value)
1307 |         else:
1308 |             self.write(repr(value))
1309 | 
1310 |     def visit_Constant(self, node):
1311 |         value = node.value
1312 |         if isinstance(value, tuple):
1313 |             with self.delimit("(", ")"):
1314 |                 self.items_view(self._write_constant, value)
1315 |         elif value is ...:
1316 |             self.write("...")
1317 |         else:
1318 |             if node.kind == "u":
1319 |                 self.write("u")
1320 |             self._write_constant(node.value)
1321 | 
1322 |     def visit_List(self, node):
1323 |         with self.delimit("[", "]"):
1324 |             self.interleave(lambda: self.write(", "), self.traverse, node.elts)
1325 | 
1326 |     def visit_ListComp(self, node):
1327 |         with self.delimit("[", "]"):
1328 |             self.traverse(node.elt)
1329 |             for gen in node.generators:
1330 |                 self.traverse(gen)
1331 | 
1332 |     def visit_GeneratorExp(self, node):
1333 |         with self.delimit("(", ")"):
1334 |             self.traverse(node.elt)
1335 |             for gen in node.generators:
1336 |                 self.traverse(gen)
1337 | 
1338 |     def visit_SetComp(self, node):
1339 |         with self.delimit("{", "}"):
1340 |             self.traverse(node.elt)
1341 |             for gen in node.generators:
1342 |                 self.traverse(gen)
1343 | 
1344 |     def visit_DictComp(self, node):
1345 |         with self.delimit("{", "}"):
1346 |             self.traverse(node.key)
1347 |             self.write(": ")
1348 |             self.traverse(node.value)
1349 |             for gen in node.generators:
1350 |                 self.traverse(gen)
1351 | 
1352 |     def visit_comprehension(self, node):
1353 |         if node.is_async:
1354 |             self.write(" async for ")
1355 |         else:
1356 |             self.write(" for ")
1357 |         self.set_precedence(_Precedence.TUPLE, node.target)
1358 |         self.traverse(node.target)
1359 |         self.write(" in ")
1360 |         self.set_precedence(_Precedence.TEST.next(), node.iter, *node.ifs)
1361 |         self.traverse(node.iter)
1362 |         for if_clause in node.ifs:
1363 |             self.write(" if ")
1364 |             self.traverse(if_clause)
1365 | 
1366 |     def visit_IfExp(self, node):
1367 |         with self.require_parens(_Precedence.TEST, node):
1368 |             self.set_precedence(_Precedence.TEST.next(), node.body, node.test)
1369 |             self.traverse(node.body)
1370 |             self.write(" if ")
1371 |             self.traverse(node.test)
1372 |             self.write(" else ")
1373 |             self.set_precedence(_Precedence.TEST, node.orelse)
1374 |             self.traverse(node.orelse)
1375 | 
1376 |     def visit_Set(self, node):
1377 |         if node.elts:
1378 |             with self.delimit("{", "}"):
1379 |                 self.interleave(lambda: self.write(", "), self.traverse, node.elts)
1380 |         else:
1381 |             # `{}` would be interpreted as a dictionary literal, and
1382 |             # `set` might be shadowed. Thus:
1383 |             self.write('{*()}')
1384 | 
1385 |     def visit_Dict(self, node):
1386 |         def write_key_value_pair(k, v):
1387 |             self.traverse(k)
1388 |             self.write(": ")
1389 |             self.traverse(v)
1390 | 
1391 |         def write_item(item):
1392 |             k, v = item
1393 |             if k is None:
1394 |                 # for dictionary unpacking operator in dicts {**{'y': 2}}
1395 |                 # see PEP 448 for details
1396 |                 self.write("**")
1397 |                 self.set_precedence(_Precedence.EXPR, v)
1398 |                 self.traverse(v)
1399 |             else:
1400 |                 write_key_value_pair(k, v)
1401 | 
1402 |         with self.delimit("{", "}"):
1403 |             self.interleave(
1404 |                 lambda: self.write(", "), write_item, zip(node.keys, node.values)
1405 |             )
1406 | 
1407 |     def visit_Tuple(self, node):
1408 |         with self.delimit_if(
1409 |             "(",
1410 |             ")",
1411 |             len(node.elts) == 0 or self.get_precedence(node) > _Precedence.TUPLE
1412 |         ):
1413 |             self.items_view(self.traverse, node.elts)
1414 | 
1415 |     unop = {"Invert": "~", "Not": "not", "UAdd": "+", "USub": "-"}
1416 |     unop_precedence = {
1417 |         "not": _Precedence.NOT,
1418 |         "~": _Precedence.FACTOR,
1419 |         "+": _Precedence.FACTOR,
1420 |         "-": _Precedence.FACTOR,
1421 |     }
1422 | 
1423 |     def visit_UnaryOp(self, node):
1424 |         operator = self.unop[node.op.__class__.__name__]
1425 |         operator_precedence = self.unop_precedence[operator]
1426 |         with self.require_parens(operator_precedence, node):
1427 |             self.write(operator)
1428 |             # factor prefixes (+, -, ~) shouldn't be separated
1429 |             # from the value they belong, (e.g: +1 instead of + 1)
1430 |             if operator_precedence is not _Precedence.FACTOR:
1431 |                 self.write(" ")
1432 |             self.set_precedence(operator_precedence, node.operand)
1433 |             self.traverse(node.operand)
1434 | 
1435 |     binop = {
1436 |         "Add": "+",
1437 |         "Sub": "-",
1438 |         "Mult": "*",
1439 |         "MatMult": "@",
1440 |         "Div": "/",
1441 |         "Mod": "%",
1442 |         "LShift": "<<",
1443 |         "RShift": ">>",
1444 |         "BitOr": "|",
1445 |         "BitXor": "^",
1446 |         "BitAnd": "&",
1447 |         "FloorDiv": "//",
1448 |         "Pow": "**",
1449 |     }
1450 | 
1451 |     binop_precedence = {
1452 |         "+": _Precedence.ARITH,
1453 |         "-": _Precedence.ARITH,
1454 |         "*": _Precedence.TERM,
1455 |         "@": _Precedence.TERM,
1456 |         "/": _Precedence.TERM,
1457 |         "%": _Precedence.TERM,
1458 |         "<<": _Precedence.SHIFT,
1459 |         ">>": _Precedence.SHIFT,
1460 |         "|": _Precedence.BOR,
1461 |         "^": _Precedence.BXOR,
1462 |         "&": _Precedence.BAND,
1463 |         "//": _Precedence.TERM,
1464 |         "**": _Precedence.POWER,
1465 |     }
1466 | 
1467 |     binop_rassoc = frozenset(("**",))
1468 |     def visit_BinOp(self, node):
1469 |         operator = self.binop[node.op.__class__.__name__]
1470 |         operator_precedence = self.binop_precedence[operator]
1471 |         with self.require_parens(operator_precedence, node):
1472 |             if operator in self.binop_rassoc:
1473 |                 left_precedence = operator_precedence.next()
1474 |                 right_precedence = operator_precedence
1475 |             else:
1476 |                 left_precedence = operator_precedence
1477 |                 right_precedence = operator_precedence.next()
1478 | 
1479 |             self.set_precedence(left_precedence, node.left)
1480 |             self.traverse(node.left)
1481 |             self.write(f" {operator} ")
1482 |             self.set_precedence(right_precedence, node.right)
1483 |             self.traverse(node.right)
1484 | 
1485 |     cmpops = {
1486 |         "Eq": "==",
1487 |         "NotEq": "!=",
1488 |         "Lt": "<",
1489 |         "LtE": "<=",
1490 |         "Gt": ">",
1491 |         "GtE": ">=",
1492 |         "Is": "is",
1493 |         "IsNot": "is not",
1494 |         "In": "in",
1495 |         "NotIn": "not in",
1496 |     }
1497 | 
1498 |     def visit_Compare(self, node):
1499 |         with self.require_parens(_Precedence.CMP, node):
1500 |             self.set_precedence(_Precedence.CMP.next(), node.left, *node.comparators)
1501 |             self.traverse(node.left)
1502 |             for o, e in zip(node.ops, node.comparators):
1503 |                 self.write(" " + self.cmpops[o.__class__.__name__] + " ")
1504 |                 self.traverse(e)
1505 | 
1506 |     boolops = {"And": "and", "Or": "or"}
1507 |     boolop_precedence = {"and": _Precedence.AND, "or": _Precedence.OR}
1508 | 
1509 |     def visit_BoolOp(self, node):
1510 |         operator = self.boolops[node.op.__class__.__name__]
1511 |         operator_precedence = self.boolop_precedence[operator]
1512 | 
1513 |         def increasing_level_traverse(node):
1514 |             nonlocal operator_precedence
1515 |             operator_precedence = operator_precedence.next()
1516 |             self.set_precedence(operator_precedence, node)
1517 |             self.traverse(node)
1518 | 
1519 |         with self.require_parens(operator_precedence, node):
1520 |             s = f" {operator} "
1521 |             self.interleave(lambda: self.write(s), increasing_level_traverse, node.values)
1522 | 
1523 |     def visit_Attribute(self, node):
1524 |         self.set_precedence(_Precedence.ATOM, node.value)
1525 |         self.traverse(node.value)
1526 |         # Special case: 3.__abs__() is a syntax error, so if node.value
1527 |         # is an integer literal then we need to either parenthesize
1528 |         # it or add an extra space to get 3 .__abs__().
1529 |         if isinstance(node.value, Constant) and isinstance(node.value.value, int):
1530 |             self.write(" ")
1531 |         self.write(".")
1532 |         self.write(node.attr)
1533 | 
1534 |     def visit_Call(self, node):
1535 |         self.set_precedence(_Precedence.ATOM, node.func)
1536 |         self.traverse(node.func)
1537 |         with self.delimit("(", ")"):
1538 |             comma = False
1539 |             for e in node.args:
1540 |                 if comma:
1541 |                     self.write(", ")
1542 |                 else:
1543 |                     comma = True
1544 |                 self.traverse(e)
1545 |             for e in node.keywords:
1546 |                 if comma:
1547 |                     self.write(", ")
1548 |                 else:
1549 |                     comma = True
1550 |                 self.traverse(e)
1551 | 
1552 |     def visit_Subscript(self, node):
1553 |         def is_non_empty_tuple(slice_value):
1554 |             return (
1555 |                 isinstance(slice_value, Tuple)
1556 |                 and slice_value.elts
1557 |             )
1558 | 
1559 |         self.set_precedence(_Precedence.ATOM, node.value)
1560 |         self.traverse(node.value)
1561 |         with self.delimit("[", "]"):
1562 |             if is_non_empty_tuple(node.slice):
1563 |                 # parentheses can be omitted if the tuple isn't empty
1564 |                 self.items_view(self.traverse, node.slice.elts)
1565 |             else:
1566 |                 self.traverse(node.slice)
1567 | 
1568 |     def visit_Starred(self, node):
1569 |         self.write("*")
1570 |         self.set_precedence(_Precedence.EXPR, node.value)
1571 |         self.traverse(node.value)
1572 | 
1573 |     def visit_Ellipsis(self, node):
1574 |         self.write("...")
1575 | 
1576 |     def visit_Slice(self, node):
1577 |         if node.lower:
1578 |             self.traverse(node.lower)
1579 |         self.write(":")
1580 |         if node.upper:
1581 |             self.traverse(node.upper)
1582 |         if node.step:
1583 |             self.write(":")
1584 |             self.traverse(node.step)
1585 | 
1586 |     def visit_Match(self, node):
1587 |         self.fill("match ")
1588 |         self.traverse(node.subject)
1589 |         with self.block():
1590 |             for case in node.cases:
1591 |                 self.traverse(case)
1592 | 
1593 |     def visit_arg(self, node):
1594 |         self.write(node.arg)
1595 |         if node.annotation:
1596 |             self.write(": ")
1597 |             self.traverse(node.annotation)
1598 | 
1599 |     def visit_arguments(self, node):
1600 |         first = True
1601 |         # normal arguments
1602 |         all_args = node.posonlyargs + node.args
1603 |         defaults = [None] * (len(all_args) - len(node.defaults)) + node.defaults
1604 |         for index, elements in enumerate(zip(all_args, defaults), 1):
1605 |             a, d = elements
1606 |             if first:
1607 |                 first = False
1608 |             else:
1609 |                 self.write(", ")
1610 |             self.traverse(a)
1611 |             if d:
1612 |                 self.write("=")
1613 |                 self.traverse(d)
1614 |             if index == len(node.posonlyargs):
1615 |                 self.write(", /")
1616 | 
1617 |         # varargs, or bare '*' if no varargs but keyword-only arguments present
1618 |         if node.vararg or node.kwonlyargs:
1619 |             if first:
1620 |                 first = False
1621 |             else:
1622 |                 self.write(", ")
1623 |             self.write("*")
1624 |             if node.vararg:
1625 |                 self.write(node.vararg.arg)
1626 |                 if node.vararg.annotation:
1627 |                     self.write(": ")
1628 |                     self.traverse(node.vararg.annotation)
1629 | 
1630 |         # keyword-only arguments
1631 |         if node.kwonlyargs:
1632 |             for a, d in zip(node.kwonlyargs, node.kw_defaults):
1633 |                 self.write(", ")
1634 |                 self.traverse(a)
1635 |                 if d:
1636 |                     self.write("=")
1637 |                     self.traverse(d)
1638 | 
1639 |         # kwargs
1640 |         if node.kwarg:
1641 |             if first:
1642 |                 first = False
1643 |             else:
1644 |                 self.write(", ")
1645 |             self.write("**" + node.kwarg.arg)
1646 |             if node.kwarg.annotation:
1647 |                 self.write(": ")
1648 |                 self.traverse(node.kwarg.annotation)
1649 | 
1650 |     def visit_keyword(self, node):
1651 |         if node.arg is None:
1652 |             self.write("**")
1653 |         else:
1654 |             self.write(node.arg)
1655 |             self.write("=")
1656 |         self.traverse(node.value)
1657 | 
1658 |     def visit_Lambda(self, node):
1659 |         with self.require_parens(_Precedence.TEST, node):
1660 |             self.write("lambda")
1661 |             with self.buffered() as buffer:
1662 |                 self.traverse(node.args)
1663 |             if buffer:
1664 |                 self.write(" ", *buffer)
1665 |             self.write(": ")
1666 |             self.set_precedence(_Precedence.TEST, node.body)
1667 |             self.traverse(node.body)
1668 | 
1669 |     def visit_alias(self, node):
1670 |         self.write(node.name)
1671 |         if node.asname:
1672 |             self.write(" as " + node.asname)
1673 | 
1674 |     def visit_withitem(self, node):
1675 |         self.traverse(node.context_expr)
1676 |         if node.optional_vars:
1677 |             self.write(" as ")
1678 |             self.traverse(node.optional_vars)
1679 | 
1680 |     def visit_match_case(self, node):
1681 |         self.fill("case ")
1682 |         self.traverse(node.pattern)
1683 |         if node.guard:
1684 |             self.write(" if ")
1685 |             self.traverse(node.guard)
1686 |         with self.block():
1687 |             self.traverse(node.body)
1688 | 
1689 |     def visit_MatchValue(self, node):
1690 |         self.traverse(node.value)
1691 | 
1692 |     def visit_MatchSingleton(self, node):
1693 |         self._write_constant(node.value)
1694 | 
1695 |     def visit_MatchSequence(self, node):
1696 |         with self.delimit("[", "]"):
1697 |             self.interleave(
1698 |                 lambda: self.write(", "), self.traverse, node.patterns
1699 |             )
1700 | 
1701 |     def visit_MatchStar(self, node):
1702 |         name = node.name
1703 |         if name is None:
1704 |             name = "_"
1705 |         self.write(f"*{name}")
1706 | 
1707 |     def visit_MatchMapping(self, node):
1708 |         def write_key_pattern_pair(pair):
1709 |             k, p = pair
1710 |             self.traverse(k)
1711 |             self.write(": ")
1712 |             self.traverse(p)
1713 | 
1714 |         with self.delimit("{", "}"):
1715 |             keys = node.keys
1716 |             self.interleave(
1717 |                 lambda: self.write(", "),
1718 |                 write_key_pattern_pair,
1719 |                 zip(keys, node.patterns, strict=True),
1720 |             )
1721 |             rest = node.rest
1722 |             if rest is not None:
1723 |                 if keys:
1724 |                     self.write(", ")
1725 |                 self.write(f"**{rest}")
1726 | 
1727 |     def visit_MatchClass(self, node):
1728 |         self.set_precedence(_Precedence.ATOM, node.cls)
1729 |         self.traverse(node.cls)
1730 |         with self.delimit("(", ")"):
1731 |             patterns = node.patterns
1732 |             self.interleave(
1733 |                 lambda: self.write(", "), self.traverse, patterns
1734 |             )
1735 |             attrs = node.kwd_attrs
1736 |             if attrs:
1737 |                 def write_attr_pattern(pair):
1738 |                     attr, pattern = pair
1739 |                     self.write(f"{attr}=")
1740 |                     self.traverse(pattern)
1741 | 
1742 |                 if patterns:
1743 |                     self.write(", ")
1744 |                 self.interleave(
1745 |                     lambda: self.write(", "),
1746 |                     write_attr_pattern,
1747 |                     zip(attrs, node.kwd_patterns, strict=True),
1748 |                 )
1749 | 
1750 |     def visit_MatchAs(self, node):
1751 |         name = node.name
1752 |         pattern = node.pattern
1753 |         if name is None:
1754 |             self.write("_")
1755 |         elif pattern is None:
1756 |             self.write(node.name)
1757 |         else:
1758 |             with self.require_parens(_Precedence.TEST, node):
1759 |                 self.set_precedence(_Precedence.BOR, node.pattern)
1760 |                 self.traverse(node.pattern)
1761 |                 self.write(f" as {node.name}")
1762 | 
1763 |     def visit_MatchOr(self, node):
1764 |         with self.require_parens(_Precedence.BOR, node):
1765 |             self.set_precedence(_Precedence.BOR.next(), *node.patterns)
1766 |             self.interleave(lambda: self.write(" | "), self.traverse, node.patterns)
1767 | 
1768 | def unparse(ast_obj):
1769 |     unparser = _Unparser()
1770 |     return unparser.visit(ast_obj)
1771 | 
1772 | 
1773 | _deprecated_globals = {
1774 |     name: globals().pop(name)
1775 |     for name in ('Num', 'Str', 'Bytes', 'NameConstant', 'Ellipsis')
1776 | }
1777 | 
1778 | def __getattr__(name):
1779 |     if name in _deprecated_globals:
1780 |         globals()[name] = value = _deprecated_globals[name]
1781 |         import warnings
1782 |         warnings._deprecated(
1783 |             f"ast.{name}", message=_DEPRECATED_CLASS_MESSAGE, remove=(3, 14)
1784 |         )
1785 |         return value
1786 |     raise AttributeError(f"module 'ast' has no attribute '{name}'")
1787 | 
1788 | 
1789 | def main():
1790 |     import argparse
1791 | 
1792 |     parser = argparse.ArgumentParser(prog='python -m ast')
1793 |     parser.add_argument('infile', type=argparse.FileType(mode='rb'), nargs='?',
1794 |                         default='-',
1795 |                         help='the file to parse; defaults to stdin')
1796 |     parser.add_argument('-m', '--mode', default='exec',
1797 |                         choices=('exec', 'single', 'eval', 'func_type'),
1798 |                         help='specify what kind of code must be parsed')
1799 |     parser.add_argument('--no-type-comments', default=True, action='store_false',
1800 |                         help="don't add information about type comments")
1801 |     parser.add_argument('-a', '--include-attributes', action='store_true',
1802 |                         help='include attributes such as line numbers and '
1803 |                              'column offsets')
1804 |     parser.add_argument('-i', '--indent', type=int, default=3,
1805 |                         help='indentation of nodes (number of spaces)')
1806 |     args = parser.parse_args()
1807 | 
1808 |     with args.infile as infile:
1809 |         source = infile.read()
1810 |     tree = parse(source, args.infile.name, args.mode, type_comments=args.no_type_comments)
1811 |     print(dump(tree, include_attributes=args.include_attributes, indent=args.indent))
1812 | 
1813 | if __name__ == '__main__':
1814 |     main()


--------------------------------------------------------------------------------