├── tests
    ├── __init__.py
    ├── conftest.py
    └── subcommands
    │   ├── files
    │       ├── test_files.py
    │       ├── test_files_ls.py
    │       └── test_files_rm.py
    │   ├── test_prompts.py
    │   ├── test_check.py
    │   ├── test_drop.py
    │   ├── query
    │       ├── test_types.py
    │       └── test_reranker.py
    │   ├── test_clean.py
    │   ├── test_chunks.py
    │   ├── test_update.py
    │   └── test_ls.py
├── .vectorcode
    ├── vectorcode.exclude
    └── vectorcode.include
├── neovim.toml
├── images
    ├── sudoku_no_rag.png
    ├── sudoku_with_rag.png
    └── codecompanion_chat.png
├── selene.toml
├── stylua.toml
├── src
    └── vectorcode
    │   ├── __init__.py
    │   ├── subcommands
    │       ├── chunks.py
    │       ├── check.py
    │       ├── files
    │       │   ├── __init__.py
    │       │   ├── ls.py
    │       │   └── rm.py
    │       ├── __init__.py
    │       ├── query
    │       │   ├── reranker
    │       │   │   ├── naive.py
    │       │   │   ├── cross_encoder.py
    │       │   │   ├── __init__.py
    │       │   │   └── base.py
    │       │   ├── types.py
    │       │   └── __init__.py
    │       ├── drop.py
    │       ├── clean.py
    │       ├── ls.py
    │       ├── prompt.py
    │       ├── update.py
    │       └── init.py
    │   ├── debugging.py
    │   └── main.py
├── lua
    ├── vectorcode
    │   ├── integrations
    │   │   ├── init.lua
    │   │   ├── codecompanion
    │   │   │   ├── prompts
    │   │   │   │   ├── presets.lua
    │   │   │   │   └── init.lua
    │   │   │   ├── init.lua
    │   │   │   ├── common.lua
    │   │   │   ├── ls_tool.lua
    │   │   │   ├── files_ls_tool.lua
    │   │   │   ├── files_rm_tool.lua
    │   │   │   └── vectorise_tool.lua
    │   │   ├── heirline.lua
    │   │   ├── lualine.lua
    │   │   └── copilotchat.lua
    │   ├── jobrunner
    │   │   ├── cmd.lua
    │   │   ├── init.lua
    │   │   └── lsp.lua
    │   ├── cacher
    │   │   ├── init.lua
    │   │   └── default.lua
    │   ├── utils.lua
    │   ├── config.lua
    │   ├── types.lua
    │   └── init.lua
    └── codecompanion
    │   └── _extensions
    │       └── vectorcode
    │           └── init.lua
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── config.yml
    │   ├── enhancement.md
    │   └── bug_report.md
    ├── workflows
    │   ├── selene.yml
    │   ├── formatting.yaml
    │   ├── release.yml
    │   ├── test_and_cov.yml
    │   └── panvimdoc.yml
    └── FUNDING.yml
├── .pre-commit-config.yaml
├── Makefile
├── LICENSE
├── pyproject.toml
├── plugin
    └── vectorcode.lua
├── docs
    └── CONTRIBUTING.md
├── .gitignore
└── README.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.vectorcode/vectorcode.exclude:
--------------------------------------------------------------------------------
1 | .vectorcode/
2 | .github/
3 | 


--------------------------------------------------------------------------------
/neovim.toml:
--------------------------------------------------------------------------------
1 | [selene]
2 | base = "lua51"
3 | name = "neovim"
4 | 
5 | [vim]
6 | any = true
7 | 


--------------------------------------------------------------------------------
/images/sudoku_no_rag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Davidyz/VectorCode/HEAD/images/sudoku_no_rag.png


--------------------------------------------------------------------------------
/images/sudoku_with_rag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Davidyz/VectorCode/HEAD/images/sudoku_with_rag.png


--------------------------------------------------------------------------------
/images/codecompanion_chat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Davidyz/VectorCode/HEAD/images/codecompanion_chat.png


--------------------------------------------------------------------------------
/.vectorcode/vectorcode.include:
--------------------------------------------------------------------------------
1 | lua/vectorcode/**/*.lua
2 | src/vectorcode/**/*.py
3 | plugin/vectorcode.lua
4 | docs/*.md
5 | tests/**/*.py
6 | 


--------------------------------------------------------------------------------
/selene.toml:
--------------------------------------------------------------------------------
1 | std = "neovim"
2 | exclude = ['lua/vectorcode/integrations/codecompanion/legacy_tool.lua']
3 | 
4 | [rules]
5 | mixed_table = "allow"
6 | 


--------------------------------------------------------------------------------
/stylua.toml:
--------------------------------------------------------------------------------
1 | indent_type = "Spaces"
2 | indent_width = 2
3 | column_width = 88
4 | quote_style = "AutoPreferDouble"
5 | no_call_parentheses = false
6 | 


--------------------------------------------------------------------------------
/src/vectorcode/__init__.py:
--------------------------------------------------------------------------------
1 | try:  # pragma: no cover
2 |     # this will be populated by pdm build backend when building.
3 |     from vectorcode._version import __version__
4 | except Exception:
5 |     __version__ = "0.0.0"
6 | 


--------------------------------------------------------------------------------
/lua/vectorcode/integrations/init.lua:
--------------------------------------------------------------------------------
1 | return {
2 |   codecompanion = require("vectorcode.integrations.codecompanion"),
3 |   copilotchat = require("vectorcode.integrations.copilotchat"),
4 |   lualine = require("vectorcode.integrations.lualine"),
5 |   heirline = require("vectorcode.integrations.heirline"),
6 | }
7 | 


--------------------------------------------------------------------------------
/lua/vectorcode/integrations/codecompanion/prompts/presets.lua:
--------------------------------------------------------------------------------
 1 | ---@type table<string, VectorCode.CodeCompanion.PromptFactory.Opts>
 2 | local M = {}
 3 | 
 4 | M["Neovim Tutor"] = {
 5 |   project_root = vim.fs.normalize(vim.env.VIMRUNTIME),
 6 |   file_patterns = { "lua/**/*.lua", "doc/**/*.txt" },
 7 | }
 8 | 
 9 | return M
10 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from vectorcode.cli_utils import GLOBAL_CONFIG_DIR
 4 | 
 5 | 
 6 | @pytest.fixture(autouse=True)
 7 | def restore_global_config_path():
 8 |     global GLOBAL_CONFIG_DIR
 9 |     original_global_config_path = GLOBAL_CONFIG_DIR
10 |     yield
11 |     GLOBAL_CONFIG_DIR = original_global_config_path
12 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | contact_links:
3 |   - name: Ask a question
4 |     url: https://github.com/Davidyz/VectorCode/discussions/new?category=q-a
5 |     about: If you're new to VectorCode and is having trouble setting it up, post in discussions first. We can convert it to an issue if something's indeed wrong with VectorCode.
6 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://github.com/astral-sh/ruff-pre-commit
 3 |   rev: v0.9.1
 4 |   hooks:
 5 |     - id: ruff
 6 |       language: python
 7 |     - id: ruff
 8 |       language: python
 9 |       args: [ "check", "--fix", "--select", "I" ]
10 |     - id: ruff-format
11 |       language: python
12 | - repo: https://github.com/JohnnyMorganz/StyLua
13 |   rev: v2.0.2
14 |   hooks:
15 |     - id: stylua-github
16 |       language: lua
17 | 


--------------------------------------------------------------------------------
/src/vectorcode/subcommands/chunks.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | from vectorcode.chunking import TreeSitterChunker
 4 | from vectorcode.cli_utils import Config
 5 | 
 6 | 
 7 | async def chunks(configs: Config) -> int:
 8 |     chunker = TreeSitterChunker(configs)
 9 |     result = []
10 |     for file_path in configs.files:
11 |         result.append(list(i.export_dict() for i in chunker.chunk(str(file_path))))
12 |     print(json.dumps((result)))
13 |     return 0
14 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/enhancement.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Enhancement
 3 | about: Discuss what new features can be added, or existing features improved.
 4 | title: "[FEAT]"
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the feature you want**
11 | 
12 | > Tell me about the feature, ideally with a scenario where this feature would be
13 | useful.
14 | 
15 | **Complimentary Material**
16 | 
17 | > If there's any material that may help me implement/test this feature, please
18 | list them here.
19 | 


--------------------------------------------------------------------------------
/.github/workflows/selene.yml:
--------------------------------------------------------------------------------
 1 | name: Selene check
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - "main"
 7 |     paths:
 8 |       - "lua/**/*.lua"
 9 |       - "plugin/*.lua"
10 |   pull_request:
11 | 
12 | jobs:
13 |   selene:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - uses: actions/checkout@v4
17 |       - name: Run Selene check
18 |         uses: NTBBloodbath/selene-action@v1.0.0
19 |         with:
20 |           token: ${{ secrets.GITHUB_TOKEN }}
21 |           args: lua/
22 |           version: 0.28.0
23 | 


--------------------------------------------------------------------------------
/.github/workflows/formatting.yaml:
--------------------------------------------------------------------------------
 1 | name: Style check
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 | jobs:
12 |   style-check:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/checkout@v4
16 | 
17 |       - name: Ruff
18 |         uses: astral-sh/ruff-action@v3
19 | 
20 |       - uses: JohnnyMorganz/stylua-action@v4
21 |         with:
22 |           token: ${{ secrets.GITHUB_TOKEN }}
23 |           version: latest
24 |           # CLI arguments
25 |           args: --check .
26 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: multitest
 2 | 
 3 | DEFAULT_GROUPS=--group dev --group lsp --group mcp --group debug
 4 | 
 5 | deps:
 6 | 	pdm lock $(DEFAULT_GROUPS) || pdm lock $(DEFAULT_GROUPS) --group legacy; \
 7 | 	pdm install
 8 | 	
 9 | test:
10 | 	make deps; \
11 | 	pdm run pytest --enable-coredumpy --coredumpy-dir dumps
12 | 
13 | multitest:
14 | 	@for i in {11..13}; do \
15 | 		pdm use python3.$$i; \
16 | 		make test; \
17 | 	done
18 | 
19 | coverage:
20 | 	make deps; \
21 | 	pdm run coverage run -m pytest; \
22 | 	pdm run coverage html; \
23 | 	pdm run coverage report -m
24 | 
25 | lint:
26 | 	pdm run ruff check src/**/*.py; \
27 | 	pdm run basedpyright src/**/*.py; \
28 | 	selene lua/**/*.lua plugin/*.lua
29 | 


--------------------------------------------------------------------------------
/src/vectorcode/subcommands/check.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from pathlib import Path
 3 | 
 4 | from vectorcode.cli_utils import CHECK_OPTIONS, Config, find_project_config_dir
 5 | 
 6 | 
 7 | async def check(configs: Config) -> int:
 8 |     assert isinstance(configs.check_item, str)
 9 |     assert configs.check_item.lower() in CHECK_OPTIONS
10 |     match configs.check_item:
11 |         case "config":
12 |             project_local_config = await find_project_config_dir(".")
13 |             if project_local_config is None:
14 |                 print("Failed!", file=sys.stderr)
15 |                 return 1
16 |             else:
17 |                 print(str(Path(project_local_config).parent), end="")
18 |     return 0
19 | 


--------------------------------------------------------------------------------
/lua/vectorcode/integrations/codecompanion/init.lua:
--------------------------------------------------------------------------------
 1 | ---@module "codecompanion"
 2 | 
 3 | return {
 4 |   chat = {
 5 |     ---@param subcommand sub_cmd
 6 |     ---@param opts VectorCode.CodeCompanion.ToolOpts
 7 |     ---@return CodeCompanion.Tools.Tool
 8 |     make_tool = function(subcommand, opts)
 9 |       local has = require("codecompanion").has
10 |       if has ~= nil and has("function-calling") then
11 |         return require(
12 |           string.format("vectorcode.integrations.codecompanion.%s_tool", subcommand)
13 |         )(opts)
14 |       else
15 |         error("Unsupported version of codecompanion!")
16 |       end
17 |     end,
18 |     prompts = require("vectorcode.integrations.codecompanion.prompts"),
19 |   },
20 | }
21 | 


--------------------------------------------------------------------------------
/src/vectorcode/subcommands/files/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from vectorcode.cli_utils import Config, FilesAction
 4 | 
 5 | logger = logging.getLogger(name=__name__)
 6 | 
 7 | 
 8 | async def files(configs: Config) -> int:
 9 |     match configs.files_action:
10 |         case FilesAction.ls:
11 |             from vectorcode.subcommands.files import ls
12 | 
13 |             return await ls.ls(configs)
14 |         case FilesAction.rm:
15 |             from vectorcode.subcommands.files import rm
16 | 
17 |             return await rm.rm(configs)
18 |         case _:
19 |             logger.error(
20 |                 f"Unsupported subcommand for `vectorcode files`: {configs.action}"
21 |             )
22 |             return 1
23 | 


--------------------------------------------------------------------------------
/src/vectorcode/subcommands/__init__.py:
--------------------------------------------------------------------------------
 1 | from vectorcode.subcommands.check import check
 2 | from vectorcode.subcommands.chunks import chunks
 3 | from vectorcode.subcommands.clean import clean
 4 | from vectorcode.subcommands.drop import drop
 5 | from vectorcode.subcommands.files import files
 6 | from vectorcode.subcommands.init import init
 7 | from vectorcode.subcommands.ls import ls
 8 | from vectorcode.subcommands.prompt import prompts
 9 | from vectorcode.subcommands.query import query
10 | from vectorcode.subcommands.update import update
11 | from vectorcode.subcommands.vectorise import vectorise
12 | 
13 | __all__ = [
14 |     "check",
15 |     "chunks",
16 |     "clean",
17 |     "drop",
18 |     "files",
19 |     "init",
20 |     "ls",
21 |     "prompts",
22 |     "query",
23 |     "update",
24 |     "vectorise",
25 | ]
26 | 


--------------------------------------------------------------------------------
/src/vectorcode/subcommands/files/ls.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | 
 4 | from vectorcode.cli_utils import Config
 5 | from vectorcode.common import ClientManager, get_collection, list_collection_files
 6 | 
 7 | logger = logging.getLogger(name=__name__)
 8 | 
 9 | 
10 | async def ls(configs: Config) -> int:
11 |     async with ClientManager().get_client(configs=configs) as client:
12 |         try:
13 |             collection = await get_collection(client, configs, False)
14 |         except ValueError:
15 |             logger.error(f"There's no existing collection at {configs.project_root}.")
16 |             return 1
17 |         paths = await list_collection_files(collection)
18 |         if configs.pipe:
19 |             print(json.dumps(list(paths)))
20 |         else:
21 |             for p in paths:
22 |                 print(p)
23 |     return 0
24 | 


--------------------------------------------------------------------------------
/lua/vectorcode/integrations/heirline.lua:
--------------------------------------------------------------------------------
 1 | ---@class VectorCode.Heirline.Opts: VectorCode.Lualine.Opts
 2 | --- Other heirline component fields (like `hl`, `on_click`, `update`, etc.)
 3 | ---@field component_opts table
 4 | 
 5 | ---@type VectorCode.Heirline.Opts
 6 | local default_opts = { show_job_count = false, component_opts = {} }
 7 | 
 8 | ---@param opts VectorCode.Heirline.Opts?
 9 | return function(opts)
10 |   opts = vim.tbl_deep_extend("force", default_opts, opts or {}) --[[@as VectorCode.Heirline.Opts]]
11 |   local lualine_comp = require("vectorcode.integrations").lualine(opts)
12 |   local heirline_component = {
13 |     provider = function(_)
14 |       return lualine_comp[1]()
15 |     end,
16 |     condition = function(_)
17 |       return lualine_comp.cond()
18 |     end,
19 |   }
20 | 
21 |   return vim.tbl_deep_extend("force", heirline_component, opts.component_opts)
22 | end
23 | 


--------------------------------------------------------------------------------
/src/vectorcode/subcommands/query/reranker/naive.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Any
 3 | 
 4 | from vectorcode.cli_utils import Config
 5 | from vectorcode.subcommands.query.types import QueryResult
 6 | 
 7 | from .base import RerankerBase
 8 | 
 9 | logger = logging.getLogger(name=__name__)
10 | 
11 | 
12 | class NaiveReranker(RerankerBase):
13 |     """This reranker uses the distances between the embedding vectors in the database for the queries and the chunks as the measure of relevance.
14 |     No special configs required.
15 |     configs.reranker_params will be ignored.
16 |     """
17 | 
18 |     def __init__(self, configs: Config, **kwargs: Any):
19 |         super().__init__(configs)
20 | 
21 |     async def compute_similarity(self, results: list[QueryResult]):
22 |         """
23 |         Do nothing, because the QueryResult objects already contain distances.
24 |         """
25 |         pass
26 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
12 | polar: # Replace with a single Polar username
13 | buy_me_a_coffee: davidyz # Replace with a single Buy Me a Coffee username
14 | thanks_dev: # Replace with a single thanks.dev username
15 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
16 | 


--------------------------------------------------------------------------------
/src/vectorcode/subcommands/drop.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from chromadb.errors import InvalidCollectionException
 4 | 
 5 | from vectorcode.cli_utils import Config
 6 | from vectorcode.common import ClientManager, get_collection
 7 | 
 8 | logger = logging.getLogger(name=__name__)
 9 | 
10 | 
11 | async def drop(config: Config) -> int:
12 |     async with ClientManager().get_client(config) as client:
13 |         try:
14 |             collection = await get_collection(client, config)
15 |             collection_path = collection.metadata["path"]
16 |             await client.delete_collection(collection.name)
17 |             print(f"Collection for {collection_path} has been deleted.")
18 |             logger.info(f"Deteted collection at {collection_path}.")
19 |             return 0
20 |         except (ValueError, InvalidCollectionException) as e:
21 |             logger.error(
22 |                 f"{e.__class__.__name__}: There's no existing collection for {config.project_root}"
23 |             )
24 |             return 1
25 | 


--------------------------------------------------------------------------------
/src/vectorcode/subcommands/clean.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | from chromadb.api import AsyncClientAPI
 5 | 
 6 | from vectorcode.cli_utils import Config
 7 | from vectorcode.common import ClientManager, get_collections
 8 | 
 9 | logger = logging.getLogger(name=__name__)
10 | 
11 | 
12 | async def run_clean_on_client(client: AsyncClientAPI, pipe_mode: bool):
13 |     async for collection in get_collections(client):
14 |         meta = collection.metadata
15 |         logger.debug(f"{meta.get('path')}: {await collection.count()} chunk(s)")
16 |         if await collection.count() == 0 or not os.path.isdir(meta["path"]):
17 |             await client.delete_collection(collection.name)
18 |             logger.info(f"Deleted collection for {meta['path']}")
19 |             if not pipe_mode:
20 |                 print(f"Deleted {meta['path']}.")
21 | 
22 | 
23 | async def clean(configs: Config) -> int:
24 |     async with ClientManager().get_client(configs) as client:
25 |         await run_clean_on_client(client, configs.pipe)
26 |         return 0
27 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 David
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/tests/subcommands/files/test_files.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import AsyncMock, patch
 2 | 
 3 | import pytest
 4 | 
 5 | from vectorcode.cli_utils import CliAction, Config, FilesAction
 6 | from vectorcode.subcommands.files import files
 7 | 
 8 | 
 9 | @pytest.mark.asyncio
10 | async def test_files():
11 |     with patch(
12 |         "vectorcode.subcommands.files.ls.ls", return_value=AsyncMock()
13 |     ) as mock_ls:
14 |         config = Config(action=CliAction.files, files_action=FilesAction.ls)
15 |         await files(config)
16 |         mock_ls.assert_called_with(config)
17 |     with patch(
18 |         "vectorcode.subcommands.files.rm.rm", return_value=AsyncMock()
19 |     ) as mock_rm:
20 |         config = Config(action=CliAction.files, files_action=FilesAction.rm)
21 |         await files(config)
22 |         mock_rm.assert_called_with(config)
23 | 
24 | 
25 | @pytest.mark.asyncio
26 | async def test_files_invalid_actions():
27 |     with patch("vectorcode.subcommands.files.logger") as mock_logger:
28 |         config = Config(action=CliAction.files, files_action="foobar")
29 |         assert await files(config) != 0
30 |         mock_logger.error.assert_called_once()
31 | 


--------------------------------------------------------------------------------
/src/vectorcode/subcommands/files/rm.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from typing import cast
 4 | 
 5 | from chromadb.types import Where
 6 | 
 7 | from vectorcode.cli_utils import Config, expand_path
 8 | from vectorcode.common import ClientManager, get_collection
 9 | 
10 | logger = logging.getLogger(name=__name__)
11 | 
12 | 
13 | async def rm(configs: Config) -> int:
14 |     async with ClientManager().get_client(configs=configs) as client:
15 |         try:
16 |             collection = await get_collection(client, configs, False)
17 |         except ValueError:
18 |             logger.error(f"There's no existing collection at {configs.project_root}.")
19 |             return 1
20 |         paths = list(
21 |             str(expand_path(p, True)) for p in configs.rm_paths if os.path.isfile(p)
22 |         )
23 |         await collection.delete(where=cast(Where, {"path": {"$in": paths}}))
24 |         if not configs.pipe:
25 |             print(f"Removed {len(paths)} file(s).")
26 |         if await collection.count() == 0:
27 |             logger.warning(
28 |                 f"The collection at {configs.project_root} is now empty and will be removed."
29 |             )
30 |             await client.delete_collection(collection.name)
31 |     return 0
32 | 


--------------------------------------------------------------------------------
/lua/vectorcode/integrations/lualine.lua:
--------------------------------------------------------------------------------
 1 | local vc_config = require("vectorcode.config")
 2 | 
 3 | ---@class VectorCode.Lualine.Opts
 4 | ---Whether to show the number of running async jobs.
 5 | ---@field show_job_count boolean
 6 | 
 7 | ---@param opts VectorCode.Lualine.Opts?
 8 | return function(opts)
 9 |   opts = vim.tbl_deep_extend("force", { show_job_count = false }, opts or {}) --[[@as VectorCode.Lualine.Opts]]
10 |   local cacher = vc_config.get_cacher_backend()
11 |   return {
12 |     function()
13 |       local message = "VectorCode: "
14 |       if cacher.buf_is_enabled(0) then
15 |         local retrieval = cacher.query_from_cache(0, { notify = false })
16 |         if retrieval then
17 |           message = message .. tostring(#retrieval)
18 |         end
19 |         local job_count = cacher.buf_job_count(0)
20 |         if job_count > 0 then
21 |           if opts.show_job_count then
22 |             message = message .. (" (%d) "):format(job_count)
23 |           else
24 |             message = message .. "  "
25 |           end
26 |         else
27 |           message = message .. "  "
28 |         end
29 |       else
30 |         message = message .. " "
31 |       end
32 |       return message
33 |     end,
34 |     cond = function()
35 |       return cacher.buf_is_registered()
36 |     end,
37 |   }
38 | end
39 | 


--------------------------------------------------------------------------------
/tests/subcommands/test_prompts.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import json
 3 | import sys
 4 | 
 5 | from vectorcode.cli_utils import Config, PromptCategory
 6 | from vectorcode.subcommands import prompt
 7 | 
 8 | 
 9 | def test_prompts_pipe_true():
10 |     configs = Config(pipe=True, prompt_categories=PromptCategory)
11 | 
12 |     # Mock stdout
13 |     captured_output = io.StringIO()
14 |     sys.stdout = captured_output
15 | 
16 |     return_code = prompt.prompts(configs)
17 | 
18 |     sys.stdout = sys.__stdout__  # Reset stdout
19 | 
20 |     expected_output = (
21 |         json.dumps(sorted(sum(prompt.prompt_by_categories.values(), start=[]))) + "\n"
22 |     )
23 |     assert captured_output.getvalue() == expected_output
24 |     assert return_code == 0
25 | 
26 | 
27 | def test_prompts_pipe_false():
28 |     configs = Config(pipe=False, prompt_categories=PromptCategory)
29 | 
30 |     # Mock stdout
31 |     captured_output = io.StringIO()
32 |     sys.stdout = captured_output
33 | 
34 |     return_code = prompt.prompts(configs)
35 | 
36 |     sys.stdout = sys.__stdout__  # Reset stdout
37 | 
38 |     expected_output = ""
39 |     for i in sorted(sum(prompt.prompt_by_categories.values(), start=[])):
40 |         expected_output += f"- {i}\n"
41 | 
42 |     assert captured_output.getvalue() == expected_output
43 |     assert return_code == 0
44 | 


--------------------------------------------------------------------------------
/tests/subcommands/test_check.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import patch
 2 | 
 3 | import pytest
 4 | 
 5 | from vectorcode.cli_utils import CHECK_OPTIONS, Config
 6 | from vectorcode.subcommands import check
 7 | 
 8 | 
 9 | @pytest.mark.asyncio
10 | async def test_check_config_success(capsys, tmp_path):
11 |     # Create a temporary .vectorcode directory
12 |     project_root = tmp_path / ".vectorcode"
13 |     project_root.mkdir()
14 | 
15 |     config = Config(check_item="config")
16 | 
17 |     with patch("os.getcwd", return_value=str(tmp_path)):
18 |         result = await check(config)
19 |         captured = capsys.readouterr()
20 | 
21 |     assert result == 0
22 |     assert str(tmp_path) == captured.out
23 | 
24 | 
25 | @pytest.mark.asyncio
26 | async def test_check_config_failure(capsys, tmp_path):
27 |     # Ensure no .vectorcode directory exists
28 |     config = Config(check_item="config")
29 |     with patch("os.getcwd", return_value=str(tmp_path)):
30 |         result = await check(config)
31 |         captured = capsys.readouterr()
32 | 
33 |     assert result == 1
34 |     assert "Failed!" in captured.err
35 | 
36 | 
37 | @pytest.mark.asyncio
38 | async def test_check_invalid_check_item():
39 |     config = Config(check_item="invalid_item")
40 |     with pytest.raises(AssertionError):
41 |         await check(config)
42 | 
43 | 
44 | def test_check_options():
45 |     assert "config" in CHECK_OPTIONS
46 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: "[BUG]"
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. ...
16 | 2. ...
17 | 
18 | **Expected behavior**
19 | A clear and concise description of what you expected to happen.
20 | 
21 | **VectorCode Configuration**
22 | Please attach your `<project_root>/.vectorcode/config.json` or 
23 |   `~/.config/vectorcode/config.json` here.
24 | ```json
25 | 
26 | ```
27 | 
28 | For issues with the Neovim plugin, please also attach your `setup` options:
29 | ```lua
30 | 
31 | ```
32 | If it only occurs when you use VectorCode with a particular plugin, please
33 | attach the relevant config here:
34 | ```lua
35 | 
36 | ```
37 | 
38 | **Platform information:**
39 |  - If the issue is about the CLI, attach a list of packages in the Python virtual environment:
40 |    - for `pipx`, run `pipx runpip vectorcode freeze`;
41 |    - for `uv`, run `uv tool run --from=vectorcode python -m ensurepip && uv tool run --from=vectorcode python -m pip freeze`.
42 | ```
43 | 
44 | ```
45 |  - If the issue is about the neovim plugin, attach the neovim version you're using:
46 | 
47 | 
48 | **System Information:**
49 | 
50 | > For Mac users, please also mention whether you're using intel or apple silicon devices.
51 | 
52 |  - OS: Linux, MacOS, Windows...
53 | 
54 | **Additional context**
55 | Add any other context about the problem here. Please attach 
56 | [CLI logs](https://github.com/Davidyz/VectorCode/blob/main/docs/cli.md#debugging-and-diagnosing) 
57 | or 
58 | [nvim plugin logs](https://github.com/Davidyz/VectorCode/blob/main/docs/neovim.md#debugging-and-logging) 
59 | if applicable.
60 | 


--------------------------------------------------------------------------------
/src/vectorcode/subcommands/query/reranker/cross_encoder.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Any
 3 | 
 4 | from vectorcode.cli_utils import Config
 5 | from vectorcode.subcommands.query.types import QueryResult
 6 | 
 7 | from .base import RerankerBase
 8 | 
 9 | logger = logging.getLogger(name=__name__)
10 | 
11 | 
12 | class CrossEncoderReranker(RerankerBase):
13 |     """This reranker uses [`CrossEncoder` from the sentence_transformers library](https://sbert.net/docs/package_reference/cross_encoder/cross_encoder.html) for reranking.
14 |     Parameters in configs.params will be passed to the `CrossEncoder` class in the `sentence_transformers` library.
15 |     The default model is 'cross-encoder/ms-marco-MiniLM-L-6-v2'.
16 |     Consult sentence_transformers documentation for details on the available parameters.
17 |     """
18 | 
19 |     def __init__(
20 |         self,
21 |         configs: Config,
22 |         **kwargs: Any,
23 |     ):
24 |         super().__init__(configs)
25 |         from sentence_transformers import CrossEncoder
26 | 
27 |         if configs.reranker_params.get("model_name_or_path") is None:
28 |             logger.warning(
29 |                 "'model_name_or_path' is not set. Fallback to 'cross-encoder/ms-marco-MiniLM-L-6-v2'"
30 |             )
31 |             configs.reranker_params["model_name_or_path"] = (
32 |                 "cross-encoder/ms-marco-MiniLM-L-6-v2"
33 |             )
34 |         model_name = configs.reranker_params.pop("model_name_or_path")
35 |         self.model = CrossEncoder(model_name, **configs.reranker_params)
36 | 
37 |     async def compute_similarity(self, results: list[QueryResult]):
38 |         scores = self.model.predict([(str(res.chunk), res.query[0]) for res in results])
39 | 
40 |         for res, score in zip(results, scores):
41 |             res.scores = (score,)
42 | 


--------------------------------------------------------------------------------
/lua/vectorcode/jobrunner/cmd.lua:
--------------------------------------------------------------------------------
 1 | ---@type VectorCode.JobRunner
 2 | local runner = {}
 3 | 
 4 | ---@type table<integer, vim.SystemObj>
 5 | local jobs = {}
 6 | local logger = require("vectorcode.config").logger
 7 | 
 8 | function runner.run_async(args, callback, bufnr)
 9 |   if type(callback) == "function" then
10 |     callback = vim.schedule_wrap(callback)
11 |   else
12 |     callback = nil
13 |   end
14 |   logger.debug(
15 |     ("cmd jobrunner for buffer %s args: %s"):format(bufnr, vim.inspect(args))
16 |   )
17 | 
18 |   table.insert(
19 |     args,
20 |     1,
21 |     require("vectorcode.config").get_user_config().cli_cmds.vectorcode
22 |   )
23 | 
24 |   ---@type vim.SystemObj?
25 |   local job
26 |   job = vim.system(args, {}, function(out)
27 |     if job and job.pid then
28 |       jobs[job.pid] = job
29 |     end
30 |     local stdout = out.stdout or "{}"
31 |     if stdout == "" then
32 |       stdout = "{}"
33 |     end
34 |     local _, decoded = pcall(vim.json.decode, stdout, { object = true, array = true })
35 |     if type(callback) == "function" then
36 |       callback(decoded or {}, out.stderr, out.code, out.signal)
37 |     end
38 |   end)
39 |   jobs[job.pid] = job
40 |   return tonumber(job.pid)
41 | end
42 | 
43 | function runner.run(args, timeout_ms, bufnr)
44 |   if timeout_ms == nil or timeout_ms < 0 then
45 |     timeout_ms = 2 ^ 31 - 1
46 |   end
47 |   local res, err, code, signal
48 |   local pid = runner.run_async(args, function(result, error, e_code, s)
49 |     res = result
50 |     err = error
51 |     code = e_code
52 |     signal = s
53 |   end, bufnr)
54 |   if pid ~= nil and jobs[pid] ~= nil then
55 |     jobs[pid]:wait(timeout_ms)
56 |   end
57 |   return res or {}, err, code, signal
58 | end
59 | 
60 | function runner.is_job_running(job)
61 |   return jobs[job] ~= nil
62 | end
63 | 
64 | function runner.stop_job(job_handle)
65 |   local job = jobs[job_handle]
66 |   if job ~= nil then
67 |     job:kill(15)
68 |   end
69 | end
70 | 
71 | return runner
72 | 


--------------------------------------------------------------------------------
/lua/vectorcode/cacher/init.lua:
--------------------------------------------------------------------------------
 1 | local vc_config = require("vectorcode.config")
 2 | local jobrunner = require("vectorcode.jobrunner.cmd")
 3 | 
 4 | return {
 5 |   lsp = require("vectorcode.cacher.lsp"),
 6 |   default = require("vectorcode.cacher.default"),
 7 |   utils = {
 8 |     ---Checks if VectorCode has been configured properly for your project.
 9 |     ---See the CLI manual for details.
10 |     ---@param check_item string?
11 |     ---@param on_success fun(out: vim.SystemCompleted)?
12 |     ---@param on_failure fun(out: vim.SystemCompleted?)?
13 |     async_check = function(check_item, on_success, on_failure)
14 |       if not vc_config.has_cli() then
15 |         if on_failure ~= nil then
16 |           on_failure()
17 |         end
18 |         return
19 |       end
20 |       check_item = check_item or "config"
21 |       jobrunner.run_async(
22 |         { "check", check_item },
23 |         function(result, _error, code, signal)
24 |           local out_msg = nil
25 |           if type(result) == "table" and #result > 0 then
26 |             out_msg = table.concat(vim.iter(result):flatten(math.huge):totable())
27 |           elseif type(result) == "string" then
28 |             out_msg = result
29 |           end
30 | 
31 |           local err_msg = nil
32 |           if type(_error) == "table" and #_error > 0 then
33 |             err_msg = table.concat(vim.iter(_error):flatten(math.huge):totable())
34 |           elseif type(_error) == "string" then
35 |             out_msg = _error
36 |           end
37 | 
38 |           local out = {
39 |             stdout = out_msg,
40 |             stderr = err_msg,
41 |             code = code,
42 |             signal = signal,
43 |           }
44 |           if out.code == 0 and type(on_success) == "function" then
45 |             vim.schedule_wrap(on_success)(out)
46 |           elseif out.code ~= 0 and type(on_failure) == "function" then
47 |             vim.schedule_wrap(on_failure)(out)
48 |           end
49 |         end,
50 |         0
51 |       )
52 |     end,
53 |   },
54 | }
55 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     tags:
 4 |       - '[0-9]+.[0-9]+.[0-9]+'
 5 | 
 6 | jobs:
 7 |   pypi-publish:
 8 |     name: upload release to PyPI
 9 |     runs-on: ubuntu-latest
10 |     permissions:
11 |       # This permission is needed for private repositories.
12 |       contents: write
13 |       # IMPORTANT: this permission is mandatory for trusted publishing
14 |       id-token: write
15 |     steps:
16 |       - uses: actions/checkout@v4
17 | 
18 |       - uses: pdm-project/setup-pdm@v4
19 | 
20 |       - name: Publish package distributions to PyPI
21 |         run: pdm publish
22 | 
23 |       - name: Generate Changelog
24 |         id: changelog
25 |         uses: mikepenz/release-changelog-builder-action@v5
26 |         with:
27 |           mode: "PR"
28 |           configurationJson: |
29 |             {
30 |               "categories": [
31 |                 {
32 |                   "title": "## 💥 Breaking Change",
33 |                   "labels": ["breaking"]
34 |                 },
35 |                 {
36 |                   "title": "## 🚀 Features",
37 |                   "labels": ["feature", "enhancement"]
38 |                 },
39 |                 {
40 |                   "title": "## 🐛 Fixes",
41 |                   "labels": ["fix", "bug"]
42 |                 },
43 |                 {
44 |                   "title": "## 🧪 Tests",
45 |                   "labels": ["test"]
46 |                 },
47 |                 {
48 |                   "title": "## 📖 Documentation",
49 |                   "labels": ["documentation"]
50 |                 }
51 |               ],
52 |             }
53 | 
54 |           token: ${{ secrets.GITHUB_TOKEN }}
55 | 
56 |       - name: Create Release
57 |         uses: ncipollo/release-action@v1.16.0
58 |         with:
59 |           draft: false
60 |           makeLatest: true
61 |           name: Release ${{ github.ref_name }}
62 |           body: ${{ steps.changelog.outputs.changelog }}
63 |           token: ${{ secrets.GITHUB_TOKEN }}
64 |           artifacts: "./dist/*"
65 | 


--------------------------------------------------------------------------------
/lua/vectorcode/integrations/codecompanion/common.lua:
--------------------------------------------------------------------------------
 1 | ---@module "codecompanion"
 2 | 
 3 | local job_runner
 4 | local vc_config = require("vectorcode.config")
 5 | local notify_opts = vc_config.notify_opts
 6 | local logger = vc_config.logger
 7 | 
 8 | local TOOL_RESULT_SOURCE = "VectorCodeToolResult"
 9 | 
10 | return {
11 |   tool_result_source = TOOL_RESULT_SOURCE,
12 | 
13 |   ---@param t table|string|nil
14 |   ---@return string
15 |   flatten_table_to_string = function(t)
16 |     vim.deprecate(
17 |       "vectorcode.integrations.codecompanion.common.flatten_table_to_string",
18 |       "vectorcode.utils.flatten_table_to_string",
19 |       "1.0.0",
20 |       "vectorcode",
21 |       true
22 |     )
23 |     return require("vectorcode.utils").flatten_table_to_string(t)
24 |   end,
25 | 
26 |   ---@param use_lsp boolean
27 |   ---@return VectorCode.JobRunner
28 |   initialise_runner = function(use_lsp)
29 |     if job_runner == nil then
30 |       if use_lsp then
31 |         job_runner = require("vectorcode.jobrunner.lsp")
32 |       end
33 |       if job_runner == nil then
34 |         job_runner = require("vectorcode.jobrunner.cmd")
35 |         logger.info("Using cmd runner for CodeCompanion tool.")
36 |         if use_lsp then
37 |           vim.schedule_wrap(vim.notify)(
38 |             "Failed to initialise the LSP runner. Falling back to cmd runner.",
39 |             vim.log.levels.WARN,
40 |             notify_opts
41 |           )
42 |         end
43 |       else
44 |         logger.info("Using LSP runner for CodeCompanion tool.")
45 |       end
46 |     end
47 |     return job_runner
48 |   end,
49 | 
50 |   ---Convert `path` to a relative path if it's within the current project.
51 |   ---When `base` is `nil`, this function will attempt to find a project root
52 |   ---or use `cwd`.
53 |   ---@param path string
54 |   ---@param base? string
55 |   ---@return string
56 |   cleanup_path = function(path, base)
57 |     base = base or vim.fs.root(0, { ".vectorcode", ".git" }) or vim.uv.cwd() or "."
58 |     return vim.fs.relpath(base, path) or path
59 |   end,
60 | }
61 | 


--------------------------------------------------------------------------------
/src/vectorcode/debugging.py:
--------------------------------------------------------------------------------
 1 | import atexit
 2 | import cProfile
 3 | import logging
 4 | import os
 5 | import pstats
 6 | from datetime import datetime
 7 | 
 8 | __LOG_DIR = os.path.expanduser("~/.local/share/vectorcode/logs/")
 9 | 
10 | logger = logging.getLogger(name=__name__)
11 | 
12 | __profiler: cProfile.Profile | None = None
13 | 
14 | 
15 | def _ensure_log_dir():
16 |     """Ensure the log directory exists"""
17 |     os.makedirs(__LOG_DIR, exist_ok=True)
18 | 
19 | 
20 | def finish():
21 |     """Clean up profiling and save results"""
22 |     if __profiler is not None:
23 |         try:
24 |             __profiler.disable()
25 |             stats_file = os.path.join(
26 |                 __LOG_DIR,
27 |                 f"cprofile-{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.stats",
28 |             )
29 |             __profiler.dump_stats(stats_file)
30 |             print(f"cProfile stats saved to: {stats_file}")
31 | 
32 |             # Print summary stats
33 |             stats = pstats.Stats(__profiler)
34 |             stats.sort_stats("cumulative")
35 |             stats.print_stats(20)
36 |         except Exception as e:
37 |             logger.warning(f"Failed to save cProfile output: {e}")
38 | 
39 | 
40 | def enable():
41 |     """Enable cProfile-based profiling and crash debugging"""
42 |     global __profiler
43 | 
44 |     try:
45 |         _ensure_log_dir()
46 | 
47 |         # Initialize cProfile for comprehensive profiling
48 |         __profiler = cProfile.Profile()
49 |         __profiler.enable()
50 |         atexit.register(finish)
51 |         logger.info("cProfile profiling enabled successfully")
52 | 
53 |         try:
54 |             import coredumpy  # noqa: F401
55 | 
56 |             logger.info("coredumpy crash debugging enabled successfully")
57 |             coredumpy.patch_except(directory=__LOG_DIR)
58 |         except Exception as e:
59 |             logger.warning(
60 |                 f"Crash debugging will not be available. Failed to import coredumpy: {e}"
61 |             )
62 | 
63 |     except Exception as e:
64 |         logger.error(f"Failed to initialize cProfile: {e}")
65 |         logger.warning("Profiling will not be available for this session")
66 | 


--------------------------------------------------------------------------------
/tests/subcommands/test_drop.py:
--------------------------------------------------------------------------------
 1 | from contextlib import asynccontextmanager
 2 | from unittest.mock import AsyncMock, patch
 3 | 
 4 | import pytest
 5 | 
 6 | from vectorcode.cli_utils import Config
 7 | from vectorcode.subcommands.drop import drop
 8 | 
 9 | 
10 | @pytest.fixture
11 | def mock_config():
12 |     config = Config(
13 |         project_root="/path/to/project",
14 |     )  # Removed positional args
15 |     return config
16 | 
17 | 
18 | @pytest.fixture
19 | def mock_client():
20 |     return AsyncMock()
21 | 
22 | 
23 | @pytest.fixture
24 | def mock_collection():
25 |     collection = AsyncMock()
26 |     collection.name = "test_collection"
27 |     collection.metadata = {"path": "/path/to/project"}
28 |     return collection
29 | 
30 | 
31 | @pytest.mark.asyncio
32 | async def test_drop_success(mock_config, mock_client, mock_collection):
33 |     mock_client.get_collection.return_value = mock_collection
34 |     mock_client.delete_collection = AsyncMock()
35 |     with (
36 |         patch("vectorcode.subcommands.drop.ClientManager") as MockClientManager,
37 |         patch(
38 |             "vectorcode.subcommands.drop.get_collection", return_value=mock_collection
39 |         ),
40 |     ):
41 |         mock_client = AsyncMock()
42 | 
43 |         @asynccontextmanager
44 |         async def _get_client(self, config=None, need_lock=True):
45 |             yield mock_client
46 | 
47 |         mock_client_manager = MockClientManager.return_value
48 |         mock_client_manager._create_client = AsyncMock(return_value=mock_client)
49 |         mock_client_manager.get_client = _get_client
50 | 
51 |         result = await drop(mock_config)
52 |         assert result == 0
53 |         mock_client.delete_collection.assert_called_once_with(mock_collection.name)
54 | 
55 | 
56 | @pytest.mark.asyncio
57 | async def test_drop_collection_not_found(mock_config, mock_client):
58 |     mock_client.get_collection.side_effect = ValueError("Collection not found")
59 |     with patch("vectorcode.subcommands.drop.ClientManager"):
60 |         with patch(
61 |             "vectorcode.subcommands.drop.get_collection",
62 |             side_effect=ValueError("Collection not found"),
63 |         ):
64 |             result = await drop(mock_config)
65 |             assert result == 1
66 | 


--------------------------------------------------------------------------------
/.github/workflows/test_and_cov.yml:
--------------------------------------------------------------------------------
 1 | name: Test and Coverage
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 | jobs:
12 |   test:
13 |     strategy:
14 |       matrix:
15 |         python-version: [3.11, 3.12, 3.13]
16 |         os: [ubuntu-latest, macos-latest, macos-14, ubuntu-24.04-arm]
17 |         exclude:
18 |           - os: macos-14
19 |             python-version: 3.13
20 |     runs-on: ${{ matrix.os }}
21 |     timeout-minutes: 60
22 |     env:
23 |       COREDUMPY_DUMP_DIR: ${{ github.workspace }}/coredumpy_data
24 |     steps:
25 |       - uses: actions/checkout@v4
26 | 
27 |       - name: setup pdm
28 |         uses: pdm-project/setup-pdm@v4
29 |         with:
30 |           python-version: ${{ matrix.python-version }}
31 | 
32 |       - name: Install uv
33 |         uses: astral-sh/setup-uv@v5
34 |         with:
35 |           python-version: ${{ matrix.python-version }}
36 | 
37 |       - name: Configure pdm to use uv
38 |         run: pdm config use_uv true
39 | 
40 |       - name: install pdm and dependencies
41 |         run: make deps
42 | 
43 |       - name: Set custom HF cache directory
44 |         run: |
45 |           export HF_HOME="$GITHUB_WORKSPACE/hf_cache"
46 |           export SENTENCE_TRANSFORMERS_HOME="$HF_HOME"
47 |           mkdir -p "$HF_HOME"
48 |           [ -z "$(ls "$HF_HOME")" ] || rm "${HF_HOME:?}/*" -rf && true
49 | 
50 |       - name: run tests 
51 |         run: pdm run pytest --enable-coredumpy --coredumpy-dir ${{ env.COREDUMPY_DUMP_DIR }}
52 | 
53 |       - name: run coverage
54 |         run: |
55 |           pdm run coverage run -m pytest
56 |           pdm run coverage report -m
57 |           pdm run coverage xml -i
58 | 
59 |       - name: static analysis by basedpyright
60 |         run: pdm run basedpyright
61 | 
62 |       - name: upload coverage reports to codecov
63 |         uses: codecov/codecov-action@v5
64 |         with:
65 |           token: ${{ secrets.codecov_token }}
66 | 
67 |       - name: Upload coredumpy data if applicable
68 |         uses: gaogaotiantian/upload-coredumpy@v0.2
69 |         if: failure()
70 |         with:
71 |           name: coredumpy_data_${{ matrix.os }}_${{ matrix.python-version }}
72 |           path: ${{ env.COREDUMPY_DUMP_DIR }}
73 |           retention-days: 7
74 | 


--------------------------------------------------------------------------------
/src/vectorcode/subcommands/query/reranker/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import sys
 3 | from typing import Type
 4 | 
 5 | from vectorcode.cli_utils import Config
 6 | 
 7 | from .base import RerankerBase
 8 | from .cross_encoder import CrossEncoderReranker
 9 | from .naive import NaiveReranker
10 | 
11 | __all__ = ["RerankerBase", "NaiveReranker", "CrossEncoderReranker"]
12 | 
13 | logger = logging.getLogger(name=__name__)
14 | 
15 | __supported_rerankers: dict[str, Type[RerankerBase]] = {
16 |     "CrossEncoderReranker": CrossEncoderReranker,
17 |     "NaiveReranker": NaiveReranker,
18 | }
19 | 
20 | 
21 | class RerankerError(Exception):
22 |     pass
23 | 
24 | 
25 | class RerankerInitialisationError(RerankerError):
26 |     pass
27 | 
28 | 
29 | def add_reranker(cls):
30 |     """
31 |     This is a class decorator that allows you to add a custom reranker that can be
32 |     recognised by the `get_reranker` function.
33 | 
34 |     Your reranker should inherit `RerankerBase` and be decorated by `add_reranker`:
35 |     ```python
36 |     @add_reranker
37 |     class CustomReranker(RerankerBase):
38 |         # override the methods according to your need.
39 |     ```
40 |     """
41 |     if issubclass(cls, RerankerBase):
42 |         if __supported_rerankers.get(cls.__name__):
43 |             error_message = f"{cls.__name__} has been registered."
44 |             raise AttributeError(error_message)
45 |         __supported_rerankers[cls.__name__] = cls
46 |         return cls
47 |     else:
48 |         error_message = f'{cls} should be a subclass of "RerankerBase"'
49 |         raise TypeError(error_message)
50 | 
51 | 
52 | def get_available_rerankers():
53 |     return list(__supported_rerankers.values())
54 | 
55 | 
56 | def get_reranker(configs: Config) -> RerankerBase:
57 |     if configs.reranker:
58 |         if hasattr(sys.modules[__name__], configs.reranker):
59 |             # dynamic dispatch for built-in rerankers
60 |             return getattr(sys.modules[__name__], configs.reranker).create(configs)
61 | 
62 |         elif issubclass(
63 |             __supported_rerankers.get(configs.reranker, type(None)), RerankerBase
64 |         ):
65 |             return __supported_rerankers[configs.reranker].create(configs)
66 | 
67 |     if not configs.reranker:
68 |         return NaiveReranker(configs)
69 |     else:
70 |         raise RerankerInitialisationError()
71 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "VectorCode"
 3 | dynamic = ["version"]
 4 | description = "A tool to vectorise repositories for RAG."
 5 | authors = [{ name = "Davidyz", email = "hzjlyz@gmail.com" }]
 6 | dependencies = [
 7 |     "chromadb<=0.6.3",
 8 |     "sentence-transformers",
 9 |     "pathspec",
10 |     "tabulate",
11 |     "shtab",
12 |     "numpy",
13 |     "psutil",
14 |     "httpx",
15 |     "tree-sitter!=0.25.0",
16 |     "tree-sitter-language-pack",
17 |     "pygments",
18 |     "transformers>=4.36.0,!=4.51.0,!=4.51.1,!=4.51.2",
19 |     "wheel<0.46.0",
20 |     "colorlog",
21 |     "charset-normalizer>=3.4.1",
22 |     "json5",
23 |     "posthog<6.0.0",
24 |     "filelock>=3.15.0",
25 | ]
26 | requires-python = ">=3.11,<3.14"
27 | readme = "README.md"
28 | license = { text = "MIT" }
29 | 
30 | [project.urls]
31 | homepage = "https://github.com/Davidyz/VectorCode"
32 | github = "https://github.com/Davidyz/VectorCode"
33 | documentation = "https://github.com/Davidyz/VectorCode/blob/main/docs/cli.md"
34 | 
35 | [project.scripts]
36 | vectorcode = "vectorcode.main:main"
37 | vectorcode-server = "vectorcode.lsp_main:main"
38 | vectorcode-mcp-server = "vectorcode.mcp_main:main"
39 | 
40 | [build-system]
41 | requires = ["pdm-backend"]
42 | build-backend = "pdm.backend"
43 | 
44 | [tool.coverage.run]
45 | omit = [
46 |     "./tests/*",
47 |     "src/vectorcode/_version.py",
48 |     "src/vectorcode/__init__.py",
49 |     "src/vectorcode/debugging.py",
50 |     "/tmp/*",
51 | ]
52 | include = ['src/vectorcode/**/*.py']
53 | 
54 | 
55 | [tool.pdm]
56 | distribution = true
57 | 
58 | [tool.pdm.version]
59 | source = "scm"
60 | write_to = "./vectorcode/_version.py"
61 | write_template = "__version__ = '{}' # pragma: no cover"
62 | 
63 | [dependency-groups]
64 | dev = [
65 |     "ipython>=8.31.0",
66 |     "ruff>=0.9.1",
67 |     "pre-commit>=4.0.1",
68 |     "pytest>=8.3.4",
69 |     "pdm-backend>=2.4.3",
70 |     "coverage>=7.6.12",
71 |     "pytest-asyncio>=0.25.3",
72 |     "debugpy>=1.8.12",
73 |     "basedpyright>=1.29.2",
74 | ]
75 | 
76 | [project.optional-dependencies]
77 | legacy = ["numpy<2.0.0", "torch==2.2.2", "transformers<=4.49.0"]
78 | intel = ['optimum[openvino]', 'openvino']
79 | lsp = ['pygls<2.0.0', 'lsprotocol']
80 | mcp = ['mcp<2.0.0', 'pydantic']
81 | debug = ["coredumpy>=0.4.1"]
82 | 
83 | [tool.basedpyright]
84 | typeCheckingMode = "standard"
85 | ignore = ["./tests/"]
86 | 


--------------------------------------------------------------------------------
/src/vectorcode/subcommands/ls.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | import os
 4 | import socket
 5 | 
 6 | import tabulate
 7 | from chromadb.api import AsyncClientAPI
 8 | from chromadb.api.types import IncludeEnum
 9 | 
10 | from vectorcode.cli_utils import Config, cleanup_path
11 | from vectorcode.common import ClientManager, get_collections
12 | 
13 | logger = logging.getLogger(name=__name__)
14 | 
15 | 
16 | async def get_collection_list(client: AsyncClientAPI) -> list[dict]:
17 |     result = []
18 |     async for collection in get_collections(client):
19 |         meta = collection.metadata
20 |         document_meta = await collection.get(include=[IncludeEnum.metadatas])
21 |         unique_files = set(
22 |             i.get("path") for i in (document_meta["metadatas"] or []) if i is not None
23 |         )
24 |         result.append(
25 |             {
26 |                 "project-root": cleanup_path(meta["path"]),
27 |                 "user": meta.get("username"),
28 |                 "hostname": socket.gethostname(),
29 |                 "collection_name": collection.name,
30 |                 "size": await collection.count(),
31 |                 "embedding_function": meta["embedding_function"],
32 |                 "num_files": len(unique_files),
33 |             }
34 |         )
35 |     return result
36 | 
37 | 
38 | async def ls(configs: Config) -> int:
39 |     async with ClientManager().get_client(configs) as client:
40 |         result: list[dict] = await get_collection_list(client)
41 |         logger.info(f"Found the following collections: {result}")
42 | 
43 |         if configs.pipe:
44 |             print(json.dumps(result))
45 |         else:
46 |             table = []
47 |             for meta in result:
48 |                 project_root = meta["project-root"]
49 |                 if os.environ.get("HOME"):
50 |                     project_root = project_root.replace(os.environ["HOME"], "~")
51 |                 row = [
52 |                     project_root,
53 |                     meta["size"],
54 |                     meta["num_files"],
55 |                     meta["embedding_function"],
56 |                 ]
57 |                 table.append(row)
58 |             print(
59 |                 tabulate.tabulate(
60 |                     table,
61 |                     headers=[
62 |                         "Project Root",
63 |                         "Collection Size",
64 |                         "Number of Files",
65 |                         "Embedding Function",
66 |                     ],
67 |                 )
68 |             )
69 |         return 0
70 | 


--------------------------------------------------------------------------------
/plugin/vectorcode.lua:
--------------------------------------------------------------------------------
 1 | local vc_config = require("vectorcode.config")
 2 | local notify_opts = vc_config.notify_opts
 3 | 
 4 | ---@param args string[]?
 5 | ---@return table<string, any>
 6 | local function process_args(args)
 7 |   if args == nil then
 8 |     return {}
 9 |   end
10 |   local result = {}
11 |   for _, str in pairs(args) do
12 |     local equal = string.find(str, "=")
13 |     if equal then
14 |       local key = string.sub(str, 1, equal - 1)
15 |       local value = string.sub(str, equal + 1) --[[@as any]]
16 |       result[key] = value
17 |     end
18 |   end
19 |   return result
20 | end
21 | 
22 | vim.api.nvim_create_user_command("VectorCode", function(args)
23 |   local cacher = vc_config.get_cacher_backend()
24 |   local splitted_args = vim.tbl_filter(function(str)
25 |     return str ~= nil and str ~= ""
26 |   end, vim.split(args.args, " "))
27 |   local action = table.remove(splitted_args, 1)
28 |   if action == "register" then
29 |     local bufnr = vim.api.nvim_get_current_buf()
30 |     cacher.register_buffer(bufnr, {
31 |       run_on_register = true,
32 |       project_root = process_args(splitted_args).project_root,
33 |     })
34 |     vim.notify(
35 |       ("Buffer %d has been registered for VectorCode."):format(bufnr),
36 |       vim.log.levels.INFO,
37 |       notify_opts
38 |     )
39 |   elseif action == "deregister" then
40 |     local buf_nr = vim.api.nvim_get_current_buf()
41 |     cacher.deregister_buffer(buf_nr, { notify = true })
42 |   else
43 |     vim.notify(
44 |       ([[Command "VectorCode %s" was not recognised.]]):format(args.args),
45 |       vim.log.levels.ERROR,
46 |       notify_opts
47 |     )
48 |   end
49 | end, {
50 |   nargs = 1,
51 |   complete = function(arglead, cmd, _)
52 |     local cacher = vc_config.get_cacher_backend()
53 |     local splitted_cmd = vim.tbl_filter(function(str)
54 |       return str ~= nil and str ~= ""
55 |     end, vim.split(cmd, " "))
56 | 
57 |     if #splitted_cmd < 2 then
58 |       if cacher.buf_is_registered(0) then
59 |         return { "register", "deregister" }
60 |       else
61 |         return { "register" }
62 |       end
63 |     elseif #splitted_cmd == 2 and splitted_cmd[2] == "register" then
64 |       return { "project_root=" }
65 |     elseif splitted_cmd[2] == "register" and #splitted_cmd == 3 then
66 |       local prefix = "project_root="
67 |       if string.find(splitted_cmd[3], prefix) == 1 then
68 |         local partial = arglead:sub(#prefix + 1)
69 |         local dirs = vim.fn.getcompletion(partial, "dir")
70 |         for i = 1, #dirs do
71 |           dirs[i] = prefix .. dirs[i]
72 |         end
73 |         return dirs
74 |       end
75 |     end
76 |   end,
77 | })
78 | 


--------------------------------------------------------------------------------
/tests/subcommands/query/test_types.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from tree_sitter import Point
 3 | 
 4 | from vectorcode.chunking import Chunk
 5 | from vectorcode.subcommands.query.types import QueryResult
 6 | 
 7 | 
 8 | def make_dummy_chunk():
 9 |     return QueryResult(
10 |         path="dummy1.py",
11 |         chunk=Chunk(
12 |             text="hello", start=Point(row=1, column=0), end=Point(row=1, column=4)
13 |         ),
14 |         query=["hello"],
15 |         scores=[0.9],
16 |     )
17 | 
18 | 
19 | def test_QueryResult_merge():
20 |     res1, res2 = (make_dummy_chunk(), make_dummy_chunk())
21 |     res2.query = ["bye"]
22 |     res2.scores = [0.1]
23 | 
24 |     merged = QueryResult.merge(res1, res2)
25 |     assert merged.path == res1.path
26 |     assert merged.chunk == res1.chunk
27 |     assert merged.mean_score() == 0.5
28 |     assert merged.query == ("hello", "bye")
29 | 
30 | 
31 | def test_QueryResult_merge_failed():
32 |     res1, res2 = (make_dummy_chunk(), make_dummy_chunk())
33 |     res2.path = "dummy2.py"
34 |     with pytest.raises(ValueError):
35 |         QueryResult.merge(res1, res2)
36 | 
37 | 
38 | def test_QueryResult_group_by_path():
39 |     res1, res2 = (make_dummy_chunk(), make_dummy_chunk())
40 |     res2.chunk = Chunk(
41 |         "hello", start=Point(row=2, column=0), end=Point(row=2, column=4)
42 |     )
43 |     res2.query = ["bye"]
44 |     res2.scores = [0.1]
45 | 
46 |     grouped_dict = QueryResult.group(res1, res2)
47 |     assert len(grouped_dict.keys()) == 1
48 |     assert len(grouped_dict["dummy1.py"]) == 2
49 | 
50 | 
51 | def test_QueryResult_group_by_chunk():
52 |     res1, res2 = (make_dummy_chunk(), make_dummy_chunk())
53 |     res2.query = ["bye"]
54 |     res2.scores = [0.1]
55 | 
56 |     grouped_dict = QueryResult.group(res1, res2, by="chunk")
57 |     assert len(grouped_dict.keys()) == 1
58 |     assert len(grouped_dict[res1.chunk]) == 2
59 | 
60 | 
61 | def test_QueryResult_group_top_k():
62 |     res1, res2 = (make_dummy_chunk(), make_dummy_chunk())
63 |     res2.chunk = Chunk(
64 |         "hello", start=Point(row=2, column=0), end=Point(row=2, column=4)
65 |     )
66 |     res2.query = ["bye"]
67 |     res2.scores = [0.1]
68 | 
69 |     grouped_dict = QueryResult.group(res1, res2, top_k=1)
70 |     assert len(grouped_dict.keys()) == 1
71 |     assert len(grouped_dict["dummy1.py"]) == 1
72 |     assert grouped_dict["dummy1.py"][0].query[0] == "hello"
73 | 
74 | 
75 | def test_QueryResult_lt():
76 |     res1, res2 = (make_dummy_chunk(), make_dummy_chunk())
77 |     res2.chunk = Chunk(
78 |         "hello", start=Point(row=2, column=0), end=Point(row=2, column=4)
79 |     )
80 |     res2.query = ["bye"]
81 |     res2.scores = [0.1]
82 |     assert res2 < res1
83 | 


--------------------------------------------------------------------------------
/docs/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Before You Start
 2 | 
 3 | As much as I appreciate everyone who wish to contribute to this project, if
 4 | you want to submit a new feature/refactoring that is more than a few lines,
 5 | it'll be better if you open an issue or discussion before you start working.
 6 | This allows us to thoroughly exchange ideas before people invest too much time,
 7 | and will help me maintain the codebase in the long run.
 8 | 
 9 | # Technical Stuff
10 | 
11 | This project uses [pre-commit](https://pre-commit.com/) to perform some
12 | formatting and linting. If you're
13 | contributing to this project, having it on your system will help you write code
14 | that passes the CI. 
15 | You can also see 
16 | [.pre-commit-config.yaml](https://github.com/Davidyz/VectorCode/blob/main/.pre-commit-config.yaml) 
17 | for a list of hooks enabled for the repo.
18 | 
19 | ## Python CLI
20 | 
21 | The development and publication of this tool is managed by 
22 | [pdm](https://pdm-project.org/en/latest/).
23 | 
24 | Once you've cloned and `cd`ed into the repo, run `make deps`. This will call
25 | some `pdm` commands to install development dependencies. Some of them are
26 | actually optional, but for convenience I decided to leave them here. This will
27 | include [pytest](https://docs.pytest.org/en/stable/), the testing framework, 
28 | and [coverage.py](https://coverage.readthedocs.io/en/7.7.1/), the coverage
29 | report tool. If you're not familiar with pytest or coverage.py, you can run `make test` to
30 | run tests on all python code, and `make coverage` to generate a coverage report. 
31 | The testing and coverage report are also in the CI configuration, but it might 
32 | still help to run them locally before you open the PR.
33 | 
34 | This project also runs static analysis with
35 | [basedpyright](https://docs.basedpyright.com). GitHub Action will also run the
36 | check when a PR is submitted. This, as well as `ruff check`, are both included
37 | in `make lint`.
38 | 
39 | You may also find it helpful to 
40 | [enable logging](https://github.com/Davidyz/VectorCode/blob/main/docs/cli.md#debugging-and-diagnosing) 
41 | for the CLI when developing new features or working on fixes.
42 | 
43 | ## Neovim Plugin
44 | 
45 | At the moment, there isn't much to cover on here. As long as the code is 
46 | formatted (stylua) and appropriately type-annotated, you're good. I do have 
47 | plans to write some tests, but before that happens, formatting and type 
48 | annotations are the only things that you need to take special care of.
49 | 
50 | The lua codebase is linted by [selene](https://github.com/Kampfkarren/selene).
51 | You may run `make lint` or call `selene` from the CLI to lint the code.
52 | 
53 | You may find it useful to 
54 | [enable logging](https://github.com/Davidyz/VectorCode/blob/main/docs/cli.md#debugging-and-diagnosing) 
55 | when you're poking around the codebase.
56 | 


--------------------------------------------------------------------------------
/lua/vectorcode/jobrunner/init.lua:
--------------------------------------------------------------------------------
 1 | local utils = require("vectorcode.utils")
 2 | 
 3 | ---@alias VectorCode.JobRunner.Callback fun(result: table|nil, error: table|nil, code:integer, signal: integer?)
 4 | 
 5 | --- A class for calling vectorcode commands that aims at providing a unified API for both LSP and command-line backend.
 6 | --- Implementations exist for both direct command-line execution (`cmd.lua`) and LSP (`lsp.lua`).
 7 | --- For the format of the `result`, see https://github.com/Davidyz/VectorCode/blob/main/docs/cli.md#for-developers
 8 | ---@class VectorCode.JobRunner
 9 | --- Runs a vectorcode command asynchronously.
10 | --- Executes the command specified by `args`. Upon completion, if `callback` is provided,
11 | --- it's invoked with the following arguments:
12 | --- - `result`: the JSON object of the command execution result.
13 | --- - `error`: error messages, if any.
14 | --- - `code`: exit code (or error code) for the process.
15 | --- - `signal`: _for cmd runner only_, the shell signal sent to the process.
16 | --- The `bufnr` is used for context, potentially to find the project root or attach LSP clients.
17 | --- Returns a job handle (e.g., PID or LSP request ID) or nil if the job couldn't be started.
18 | ---@field run_async fun(args: string[], callback:VectorCode.JobRunner.Callback?, bufnr: integer):(job_handle:integer?)
19 | --- Runs a vectorcode command synchronously, blocking until completion or timeout.
20 | --- Executes the command specified by `args`. Waits for up to `timeout_ms` milliseconds.
21 | --- The `bufnr` is used for context, potentially to find the project root or attach LSP clients.
22 | --- Returns the following objects:
23 | --- - `result`: the JSON object of the command execution result.
24 | --- - `error`: error messages, if any.
25 | --- - `code`: exit code (or error code) for the process.
26 | --- - `signal`: _for cmd runner only_, the shell signal sent to the process.
27 | ---@field run fun(args: string[], timeout_ms: integer?, bufnr: integer):(result:table|nil, error:table|nil, code:integer, signal: integer?)
28 | --- Checks if a job associated with the given handle is currently running.
29 | --- Returns true if the job is running, false otherwise.
30 | ---@field is_job_running fun(job_handle: integer):boolean
31 | --- Attempts to stop or cancel the job associated with the given handle.
32 | ---@field stop_job fun(job_handle: integer)
33 | --- Optional initialization function. Some runners (like LSP) might require an initialization step.
34 | ---@field init function?
35 | 
36 | return {
37 |   --- Automatically find project_root from buffer path if it's not already specified.
38 |   ---@param args string[]
39 |   ---@param bufnr integer
40 |   ---@return string[]
41 |   find_root = function(args, bufnr)
42 |     if not vim.list_contains(args, "--project_root") then
43 |       local find_root = utils.find_root(bufnr)
44 |       if find_root then
45 |         vim.list_extend(args, { "--project_root", find_root })
46 |       end
47 |     end
48 |     return args
49 |   end,
50 | }
51 | 


--------------------------------------------------------------------------------
/src/vectorcode/subcommands/prompt.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | 
 4 | from vectorcode.cli_utils import Config, PromptCategory
 5 | 
 6 | logger = logging.getLogger(name=__name__)
 7 | 
 8 | prompt_by_categories: dict[str | PromptCategory, list[str]] = {
 9 |     PromptCategory.query: [
10 |         "separate phrases into distinct keywords when appropriate",
11 |         "If a class, type or function has been imported from another file, this tool may be able to find its source. Add the name of the imported symbol to the query",
12 |         "When providing answers based on VectorCode results, try to give references such as paths to files and line ranges, unless you're told otherwise (but do not include the full source code context)",
13 |         "Avoid retrieving one single file because the retrieval mechanism may not be very accurate",
14 |         "If the query results do not contain the needed context, increase the file count so that the result will more likely contain the desired files",
15 |         "If the returned paths are relative, they are relative to the root of the project directory",
16 |         "Do not suggest edits to retrieved files that are outside of the current working directory, unless the user instructed otherwise",
17 |         "When specifying the `project_root` parameter when making a query, make sure you run the `ls` tool first to retrieve a list of valid, indexed projects",
18 |         "If a query failed to retrieve desired results, a new attempt should use different keywords that are orthogonal to the previous ones but with similar meanings",
19 |         "Do not use exact query keywords that you have used in a previous tool call in the conversation, unless the user instructed otherwise, or with different count/project_root",
20 |         "Include related keywords as the search query. For example, when querying for `function`, include `return value`, `parameter`, `arguments` and alike.",
21 |     ],
22 |     PromptCategory.ls: [
23 |         "Use `ls` tool to obtain a list of indexed projects that are available to be queried by the `query` command."
24 |     ],
25 |     PromptCategory.vectorise: [
26 |         "When vectorising the files, provide accurate and case-sensitive paths to the file"
27 |     ],
28 |     "general": [
29 |         "VectorCode is the name of this tool. Do not include it in the query unless the user explicitly asks",
30 |         "**Use at your discretion** when you feel you don't have enough information about the repository or project",
31 |         "**Don't escape** special characters",
32 |     ],
33 | }
34 | prompt_strings = []
35 | 
36 | 
37 | def prompts(configs: Config) -> int:
38 |     results = prompt_by_categories["general"].copy()
39 |     for item in sorted(set(configs.prompt_categories or [PromptCategory.query])):
40 |         logger.info(f"Loading {len(prompt_by_categories[item])} prompts for {item}")
41 |         results.extend(prompt_by_categories[item])
42 |     results.sort()
43 |     if configs.pipe:
44 |         print(json.dumps(results))
45 |     else:
46 |         for i in results:
47 |             print(f"- {i}")
48 |     return 0
49 | 


--------------------------------------------------------------------------------
/tests/subcommands/test_clean.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import AsyncMock, patch
 2 | 
 3 | import pytest
 4 | from chromadb.api import AsyncClientAPI
 5 | 
 6 | from vectorcode.cli_utils import Config
 7 | from vectorcode.subcommands.clean import clean, run_clean_on_client
 8 | 
 9 | 
10 | @pytest.mark.asyncio
11 | async def test_run_clean_on_client():
12 |     mock_client = AsyncMock(spec=AsyncClientAPI)
13 |     mock_collection1 = AsyncMock()
14 |     mock_collection1.name = "test_collection_1"
15 |     mock_collection1.metadata = {"path": "/test/path1"}
16 |     mock_collection1.count.return_value = 0  # Empty collection
17 |     mock_collection2 = AsyncMock()
18 |     mock_collection2.name = "test_collection_2"
19 |     mock_collection2.metadata = {"path": "/test/path2"}
20 |     mock_collection2.count.return_value = 1  # Non-empty collection
21 | 
22 |     async def mock_get_collections(client):
23 |         yield mock_collection1
24 |         yield mock_collection2
25 | 
26 |     with (
27 |         patch("vectorcode.subcommands.clean.get_collections", new=mock_get_collections),
28 |         patch("os.path.isdir", return_value=lambda x: x == "/test/path2"),
29 |     ):
30 |         await run_clean_on_client(mock_client, pipe_mode=False)
31 | 
32 |     mock_client.delete_collection.assert_called_once_with(mock_collection1.name)
33 | 
34 | 
35 | @pytest.mark.asyncio
36 | async def test_run_clean_on_client_pipe_mode():
37 |     mock_client = AsyncMock(spec=AsyncClientAPI)
38 |     mock_collection1 = AsyncMock()
39 |     mock_collection1.name = "test_collection_1"
40 |     mock_collection1.metadata = {"path": "/test/path1"}
41 |     mock_collection1.count.return_value = 0  # Empty collection
42 | 
43 |     async def mock_get_collections(client):
44 |         yield mock_collection1
45 | 
46 |     with patch(
47 |         "vectorcode.subcommands.clean.get_collections", new=mock_get_collections
48 |     ):
49 |         await run_clean_on_client(mock_client, pipe_mode=True)
50 | 
51 |     mock_client.delete_collection.assert_called_once_with(mock_collection1.name)
52 | 
53 | 
54 | @pytest.mark.asyncio
55 | async def test_run_clean_on_removed_dir():
56 |     mock_client = AsyncMock(spec=AsyncClientAPI)
57 |     mock_collection1 = AsyncMock()
58 |     mock_collection1.name = "test_collection_1"
59 |     mock_collection1.metadata = {"path": "/test/path1"}
60 |     mock_collection1.count.return_value = 10
61 | 
62 |     async def mock_get_collections(client):
63 |         yield mock_collection1
64 | 
65 |     with (
66 |         patch("vectorcode.subcommands.clean.get_collections", new=mock_get_collections),
67 |         patch("os.path.isdir", return_value=False),
68 |     ):
69 |         await run_clean_on_client(mock_client, pipe_mode=True)
70 | 
71 |     mock_client.delete_collection.assert_called_once_with(mock_collection1.name)
72 | 
73 | 
74 | @pytest.mark.asyncio
75 | async def test_clean():
76 |     AsyncMock(spec=AsyncClientAPI)
77 |     mock_config = Config(pipe=False)
78 | 
79 |     with patch("vectorcode.subcommands.clean.ClientManager"):
80 |         result = await clean(mock_config)
81 | 
82 |     assert result == 0
83 | 


--------------------------------------------------------------------------------
/lua/vectorcode/integrations/codecompanion/ls_tool.lua:
--------------------------------------------------------------------------------
 1 | ---@module "codecompanion"
 2 | 
 3 | local vc_config = require("vectorcode.config")
 4 | local utils = require("vectorcode.utils")
 5 | local logger = vc_config.logger
 6 | 
 7 | ---@type VectorCode.CodeCompanion.LsToolOpts
 8 | local default_ls_options = {
 9 |   use_lsp = vc_config.get_user_config().async_backend == "lsp",
10 | }
11 | 
12 | ---@param opts VectorCode.CodeCompanion.LsToolOpts|{}|nil
13 | ---@return VectorCode.CodeCompanion.LsToolOpts
14 | local get_ls_tool_opts = function(opts)
15 |   opts = vim.tbl_deep_extend("force", default_ls_options, opts or {})
16 |   logger.info(
17 |     string.format(
18 |       "Loading `vectorcode_ls` with the following opts:\n%s",
19 |       vim.inspect(opts)
20 |     )
21 |   )
22 |   return opts
23 | end
24 | 
25 | ---@param opts VectorCode.CodeCompanion.LsToolOpts
26 | ---@return CodeCompanion.Tools.Tool
27 | return function(opts)
28 |   opts = get_ls_tool_opts(opts)
29 |   local job_runner =
30 |     require("vectorcode.integrations.codecompanion.common").initialise_runner(
31 |       opts.use_lsp
32 |     )
33 |   local tool_name = "vectorcode_ls"
34 |   ---@type CodeCompanion.Tools.Tool|{}
35 |   return {
36 |     name = tool_name,
37 |     cmds = {
38 |       ---@param tools CodeCompanion.Tools
39 |       ---@return nil|{ status: string, data: string }
40 |       function(tools, _, _, cb)
41 |         job_runner.run_async({ "ls", "--pipe" }, function(result, error)
42 |           if vim.islist(result) and #result > 0 then
43 |             cb({ status = "success", data = result })
44 |           else
45 |             if type(error) == "table" then
46 |               error = utils.flatten_table_to_string(error, "Unknown error.")
47 |             end
48 |             cb({
49 |               status = "error",
50 |               data = error,
51 |             })
52 |           end
53 |         end, tools.chat.bufnr)
54 |       end,
55 |     },
56 |     schema = {
57 |       type = "function",
58 |       ["function"] = {
59 |         name = tool_name,
60 |         description = [[
61 | Retrieve a list of projects accessible via the VectorCode tools.
62 | Where relevant, use paths from this tool as the `project_root` parameter in other vectorcode tools.
63 | ]],
64 |         parameters = {
65 |           -- make anthropic models happy.
66 |           type = "object",
67 |           properties = vim.empty_dict(),
68 |           required = {},
69 |           additionalProperties = false,
70 |         },
71 |       },
72 |     },
73 |     output = {
74 |       ---@param tools CodeCompanion.Tools
75 |       ---@param stdout VectorCode.LsResult[][]
76 |       success = function(_, tools, _, stdout)
77 |         stdout = stdout[#stdout]
78 |         local user_message
79 |         for i, col in ipairs(stdout) do
80 |           if i == 1 then
81 |             user_message =
82 |               string.format("**VectorCode `ls` Tool**: Found %d collections.", #stdout)
83 |           else
84 |             user_message = ""
85 |           end
86 |           tools.chat:add_tool_output(
87 |             tools.tool,
88 |             string.format("<collection>%s</collection>", col["project-root"]),
89 |             user_message
90 |           )
91 |         end
92 |       end,
93 |     },
94 |   }
95 | end
96 | 


--------------------------------------------------------------------------------
/src/vectorcode/subcommands/query/reranker/base.py:
--------------------------------------------------------------------------------
 1 | import heapq
 2 | import logging
 3 | from abc import ABC, abstractmethod
 4 | from typing import Any
 5 | 
 6 | import numpy
 7 | 
 8 | from vectorcode.chunking import Chunk
 9 | from vectorcode.cli_utils import Config, QueryInclude
10 | from vectorcode.subcommands.query.types import QueryResult
11 | 
12 | logger = logging.getLogger(name=__name__)
13 | 
14 | 
15 | class RerankerBase(ABC):
16 |     """This is the base class for the rerankers.
17 |     You should use the configs.reranker_params field to store and pass the parameters used for your reranker.
18 |     You should implement the `compute_similarity` method, which will be called by `rerank` to compute
19 |     similarity scores between search query and results.
20 |     The items in the returned list should be sorted such that the relevance decreases along the list.
21 | 
22 |     The class doc string will be added to the error message if your reranker fails to initialise.
23 |     Thus, this is a good place to put the instructions to configuring your reranker.
24 |     """
25 | 
26 |     def __init__(self, configs: Config, **kwargs: Any):
27 |         self.configs = configs
28 |         assert self.configs.query is not None, (
29 |             "'configs' should contain the query messages."
30 |         )
31 |         self.n_result = configs.n_result
32 |         self._raw_results: list[QueryResult] = []
33 | 
34 |     @classmethod
35 |     def create(cls, configs: Config, **kwargs: Any):
36 |         try:
37 |             return cls(configs, **kwargs)
38 |         except Exception as e:
39 |             e.add_note(
40 |                 "\n"
41 |                 + (
42 |                     cls.__doc__
43 |                     or f"There was an issue initialising {cls}. Please doublecheck your configuration."
44 |                 )
45 |             )
46 |             raise
47 | 
48 |     @abstractmethod
49 |     async def compute_similarity(
50 |         self, results: list[QueryResult]
51 |     ) -> None:  # pragma: nocover
52 |         """
53 |         Modify the `QueryResult.scores` field **IN-PLACE** so that they contain the correct scores.
54 |         """
55 |         raise NotImplementedError
56 | 
57 |     async def rerank(self, results: list[QueryResult]) -> list[str | Chunk]:
58 |         if len(results) == 0:
59 |             return []
60 | 
61 |         # compute the similarity scores
62 |         await self.compute_similarity(results)
63 | 
64 |         # group the results by the query type: file (path) or chunk
65 |         # and only keep the `top_k` results for each group
66 |         group_by = "path"
67 |         if QueryInclude.chunk in self.configs.include:
68 |             group_by = "chunk"
69 |         grouped_results = QueryResult.group(*results, by=group_by, top_k="auto")
70 | 
71 |         # compute the mean scores for each of the groups
72 |         scores: dict[Chunk | str, float] = {}
73 |         for key in grouped_results.keys():
74 |             scores[key] = float(
75 |                 numpy.mean(tuple(i.mean_score() for i in grouped_results[key]))
76 |             )
77 | 
78 |         return list(
79 |             i
80 |             for i in heapq.nlargest(
81 |                 self.configs.n_result, grouped_results.keys(), key=lambda x: scores[x]
82 |             )
83 |         )
84 | 


--------------------------------------------------------------------------------
/src/vectorcode/subcommands/query/types.py:
--------------------------------------------------------------------------------
 1 | import heapq
 2 | from collections import defaultdict
 3 | from dataclasses import dataclass
 4 | from typing import Literal, Union
 5 | 
 6 | import numpy
 7 | 
 8 | from vectorcode.chunking import Chunk
 9 | 
10 | 
11 | @dataclass
12 | class QueryResult:
13 |     """
14 |     The container for one single query result.
15 | 
16 |     args:
17 |     - path: path to the file
18 |     - content: `vectorcode.chunking.Chunk` object that stores the chunk
19 |     - query: query messages used for the search
20 |     - scores: similarity scores for the corresponding query.
21 |     """
22 | 
23 |     path: str
24 |     chunk: Chunk
25 |     query: tuple[str, ...]
26 |     scores: tuple[float, ...]
27 | 
28 |     @classmethod
29 |     def merge(cls, *results: "QueryResult") -> "QueryResult":
30 |         """
31 |         Given the results of a single chunk/document from different queries, merge them into a single `QueryResult` object.
32 |         """
33 |         for i in range(len(results) - 1):
34 |             if (i < len(results) - 1) and not results[i].is_same_doc(results[i + 1]):
35 |                 raise ValueError(
36 |                     f"The inputs are not the same chunk: {results[i]}, {results[i + 1]}"
37 |                 )
38 | 
39 |         return QueryResult(
40 |             path=results[0].path,
41 |             chunk=results[0].chunk,
42 |             query=sum((tuple(i.query) for i in results), start=tuple()),
43 |             scores=sum((tuple(i.scores) for i in results), start=tuple()),
44 |         )
45 | 
46 |     @staticmethod
47 |     def group(
48 |         *results: "QueryResult",
49 |         by: Union[Literal["path"], Literal["chunk"]] = "path",
50 |         top_k: int | Literal["auto"] | None = None,
51 |     ) -> dict[Chunk | str, list["QueryResult"]]:
52 |         """
53 |         Group the query results based on `key`.
54 | 
55 |         args:
56 |         - `by`: either "path" or "chunk"
57 |         - `top_k`: if set, only return the top k results for each group based on mean scores. If "auto", top k is decided by the mean number of results per group.
58 | 
59 |         returns:
60 |         - a dictionary that maps either path or chunk to a list of `QueryResult` object.
61 | 
62 |         """
63 |         assert by in {"path", "chunk"}
64 |         grouped_result: dict[Chunk | str, list["QueryResult"]] = defaultdict(list)
65 | 
66 |         for res in results:
67 |             grouped_result[getattr(res, by)].append(res)
68 | 
69 |         if top_k == "auto":
70 |             top_k = int(numpy.mean(tuple(len(i) for i in grouped_result.values())))
71 | 
72 |         if top_k and top_k > 0:
73 |             for group in grouped_result.keys():
74 |                 grouped_result[group] = heapq.nlargest(top_k, grouped_result[group])
75 |         return grouped_result
76 | 
77 |     def mean_score(self):
78 |         return float(numpy.mean(self.scores))
79 | 
80 |     def __lt__(self, other: "QueryResult"):
81 |         assert isinstance(other, QueryResult)
82 |         return self.mean_score() < other.mean_score()
83 | 
84 |     def __gt__(self, other: "QueryResult"):
85 |         assert isinstance(other, QueryResult)
86 |         return self.mean_score() > other.mean_score()
87 | 
88 |     def __eq__(self, other: object, /) -> bool:
89 |         return (
90 |             isinstance(other, QueryResult) and self.mean_score() == other.mean_score()
91 |         )
92 | 
93 |     def is_same_doc(self, other: "QueryResult") -> bool:
94 |         return self.path == other.path and self.chunk == other.chunk
95 | 


--------------------------------------------------------------------------------
/tests/subcommands/test_chunks.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from unittest.mock import MagicMock, patch
  3 | 
  4 | import pytest
  5 | from tree_sitter import Point
  6 | 
  7 | from vectorcode.chunking import Chunk, TreeSitterChunker
  8 | from vectorcode.cli_utils import Config
  9 | from vectorcode.subcommands import chunks
 10 | 
 11 | 
 12 | @pytest.mark.asyncio
 13 | async def test_chunks():
 14 |     # Mock the Config object
 15 |     mock_config = MagicMock(spec=Config)
 16 |     mock_config.chunk_size = 2000
 17 |     mock_config.overlap_ratio = 0.2
 18 |     mock_config.files = ["file1.py", "file2.py"]
 19 | 
 20 |     # Mock the TreeSitterChunker
 21 |     mock_chunker = TreeSitterChunker(mock_config)
 22 |     mock_chunker.chunk = MagicMock()
 23 |     mock_chunker.chunk.side_effect = [
 24 |         [Chunk("chunk1_file1", None, None), Chunk("chunk2_file1", None, None)],
 25 |         [
 26 |             Chunk("chunk1_file2", Point(1, 0), Point(1, 11)),
 27 |             Chunk("chunk2_file2", Point(1, 0), Point(1, 11)),
 28 |         ],
 29 |     ]
 30 |     with patch(
 31 |         "vectorcode.subcommands.chunks.TreeSitterChunker", return_value=mock_chunker
 32 |     ):
 33 |         # Call the chunks function
 34 |         result = await chunks(mock_config)
 35 | 
 36 |         # Assertions
 37 |         assert result == 0
 38 |         assert mock_chunker.config == mock_config
 39 |         mock_chunker.chunk.assert_called()
 40 |         assert mock_chunker.chunk.call_count == 2
 41 | 
 42 | 
 43 | @pytest.mark.asyncio
 44 | async def test_chunks_pipe(capsys):
 45 |     # Mock the Config object
 46 |     mock_config = MagicMock(spec=Config)
 47 |     mock_config.chunk_size = 2000
 48 |     mock_config.overlap_ratio = 0.2
 49 |     mock_config.files = ["file1.py"]
 50 |     mock_config.pipe = True
 51 | 
 52 |     # Mock the TreeSitterChunker
 53 |     mock_chunker = TreeSitterChunker(mock_config)
 54 |     mock_chunker.chunk = MagicMock()
 55 |     _chunks = [
 56 |         Chunk("chunk1_file1", Point(0, 1), Point(0, 12), path="file1.py", id="c1"),
 57 |         Chunk("chunk2_file1", Point(1, 1), Point(1, 12), path="file1.py", id="c2"),
 58 |     ]
 59 |     mock_chunker.chunk.side_effect = [
 60 |         _chunks,
 61 |     ]
 62 |     with patch(
 63 |         "vectorcode.subcommands.chunks.TreeSitterChunker", return_value=mock_chunker
 64 |     ):
 65 |         # Call the chunks function
 66 |         result = await chunks(mock_config)
 67 | 
 68 |         # Assertions
 69 |         assert result == 0
 70 |         assert mock_chunker.config == mock_config
 71 |         mock_chunker.chunk.assert_called()
 72 |         assert mock_chunker.chunk.call_count == 1
 73 | 
 74 |         captured = capsys.readouterr()
 75 |         output = json.loads(captured.out.strip())
 76 |         assert output == [
 77 |             [
 78 |                 {
 79 |                     "text": "chunk1_file1",
 80 |                     "start": {
 81 |                         "row": 0,
 82 |                         "column": 1,
 83 |                     },
 84 |                     "end": {"row": 0, "column": 12},
 85 |                     "path": "file1.py",
 86 |                     "chunk_id": "c1",
 87 |                 },
 88 |                 {
 89 |                     "text": "chunk2_file1",
 90 |                     "start": {
 91 |                         "row": 1,
 92 |                         "column": 1,
 93 |                     },
 94 |                     "end": {"row": 1, "column": 12},
 95 |                     "path": "file1.py",
 96 |                     "chunk_id": "c2",
 97 |                 },
 98 |             ]
 99 |         ]
100 | 


--------------------------------------------------------------------------------
/src/vectorcode/subcommands/update.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import logging
 3 | import os
 4 | import sys
 5 | from asyncio import Lock
 6 | 
 7 | import tqdm
 8 | from chromadb.api.types import IncludeEnum
 9 | from chromadb.errors import InvalidCollectionException
10 | 
11 | from vectorcode.cli_utils import Config
12 | from vectorcode.common import ClientManager, get_collection, verify_ef
13 | from vectorcode.subcommands.vectorise import VectoriseStats, chunked_add, show_stats
14 | 
15 | logger = logging.getLogger(name=__name__)
16 | 
17 | 
18 | async def update(configs: Config) -> int:
19 |     async with ClientManager().get_client(configs) as client:
20 |         try:
21 |             collection = await get_collection(client, configs, False)
22 |         except IndexError as e:
23 |             print(
24 |                 f"{e.__class__.__name__}: Failed to get/create the collection. Please check your config."
25 |             )
26 |             return 1
27 |         except (ValueError, InvalidCollectionException) as e:
28 |             print(
29 |                 f"{e.__class__.__name__}: There's no existing collection for {configs.project_root}",
30 |                 file=sys.stderr,
31 |             )
32 |             return 1
33 |         if collection is None:  # pragma: nocover
34 |             logger.error(
35 |                 f"Failed to find a collection at {configs.project_root} from {configs.db_url}"
36 |             )
37 |             return 1
38 |         if not verify_ef(collection, configs):  # pragma: nocover
39 |             return 1
40 | 
41 |         metas = (await collection.get(include=[IncludeEnum.metadatas]))["metadatas"]
42 |         if metas is None or len(metas) == 0:  # pragma: nocover
43 |             logger.debug("Empty collection.")
44 |             return 0
45 | 
46 |         files_gen = (str(meta.get("path", "")) for meta in metas)
47 |         files = set()
48 |         orphanes = set()
49 |         for file in files_gen:
50 |             if os.path.isfile(file):
51 |                 files.add(file)
52 |             else:
53 |                 orphanes.add(file)
54 | 
55 |         stats = VectoriseStats(removed=len(orphanes))
56 |         collection_lock = Lock()
57 |         stats_lock = Lock()
58 |         max_batch_size = await client.get_max_batch_size()
59 |         semaphore = asyncio.Semaphore(os.cpu_count() or 1)
60 | 
61 |         with tqdm.tqdm(
62 |             total=len(files), desc="Vectorising files...", disable=configs.pipe
63 |         ) as bar:
64 |             logger.info(f"Updating embeddings for {len(files)} file(s).")
65 |             try:
66 |                 tasks = [
67 |                     asyncio.create_task(
68 |                         chunked_add(
69 |                             str(file),
70 |                             collection,
71 |                             collection_lock,
72 |                             stats,
73 |                             stats_lock,
74 |                             configs,
75 |                             max_batch_size,
76 |                             semaphore,
77 |                         )
78 |                     )
79 |                     for file in files
80 |                 ]
81 |                 for task in asyncio.as_completed(tasks):
82 |                     await task
83 |                     bar.update(1)
84 |             except asyncio.CancelledError:  # pragma: nocover
85 |                 print("Abort.", file=sys.stderr)
86 |                 return 1
87 | 
88 |         if len(orphanes):
89 |             logger.info(f"Removing {len(orphanes)} orphaned files from database.")
90 |             await collection.delete(where={"path": {"$in": list(orphanes)}})
91 | 
92 |         show_stats(configs, stats)
93 |         return 0
94 | 


--------------------------------------------------------------------------------
/tests/subcommands/files/test_files_ls.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from unittest.mock import AsyncMock, patch
 3 | 
 4 | import pytest
 5 | from chromadb.api.models.AsyncCollection import AsyncCollection
 6 | 
 7 | from vectorcode.cli_utils import CliAction, Config, FilesAction
 8 | from vectorcode.subcommands.files.ls import ls
 9 | 
10 | 
11 | @pytest.fixture
12 | def client():
13 |     return AsyncMock()
14 | 
15 | 
16 | @pytest.fixture
17 | def collection():
18 |     col = AsyncMock(spec=AsyncCollection)
19 |     col.get.return_value = {
20 |         "ids": ["id1", "id2", "id3"],
21 |         "distances": [0.1, 0.2, 0.3],
22 |         "metadatas": [
23 |             {"path": "file1.py", "start": 1, "end": 1},
24 |             {"path": "file2.py", "start": 1, "end": 1},
25 |             {"path": "file3.py", "start": 1, "end": 1},
26 |         ],
27 |         "documents": [
28 |             "content1",
29 |             "content2",
30 |             "content3",
31 |         ],
32 |     }
33 |     return col
34 | 
35 | 
36 | @pytest.mark.asyncio
37 | async def test_ls(client, collection, capsys):
38 |     with (
39 |         patch("vectorcode.subcommands.files.ls.ClientManager") as MockClientManager,
40 |         patch(
41 |             "vectorcode.subcommands.files.ls.get_collection", return_value=collection
42 |         ),
43 |         patch("vectorcode.common.try_server", return_value=True),
44 |     ):
45 |         MockClientManager.return_value._create_client.return_value = client
46 |         await ls(Config(action=CliAction.files, files_action=FilesAction.ls))
47 |         out = capsys.readouterr().out
48 |         assert "file1.py" in out
49 |         assert "file2.py" in out
50 |         assert "file3.py" in out
51 | 
52 | 
53 | @pytest.mark.asyncio
54 | async def test_ls_piped(client, collection, capsys):
55 |     with (
56 |         patch("vectorcode.subcommands.files.ls.ClientManager") as MockClientManager,
57 |         patch(
58 |             "vectorcode.subcommands.files.ls.get_collection", return_value=collection
59 |         ),
60 |         patch("vectorcode.common.try_server", return_value=True),
61 |     ):
62 |         MockClientManager.return_value._create_client.return_value = client
63 |         await ls(Config(action=CliAction.files, files_action=FilesAction.ls, pipe=True))
64 |         out = capsys.readouterr().out
65 |         assert json.dumps(["file1.py", "file2.py", "file3.py"]).strip() == out.strip()
66 | 
67 | 
68 | @pytest.mark.asyncio
69 | async def test_ls_no_collection(client, collection, capsys):
70 |     with (
71 |         patch("vectorcode.subcommands.files.ls.ClientManager") as MockClientManager,
72 |         patch("vectorcode.subcommands.files.ls.get_collection", side_effect=ValueError),
73 |     ):
74 |         MockClientManager.return_value._create_client.return_value = client
75 |         assert (
76 |             await ls(
77 |                 Config(action=CliAction.files, files_action=FilesAction.ls, pipe=True)
78 |             )
79 |             != 0
80 |         )
81 | 
82 | 
83 | @pytest.mark.asyncio
84 | async def test_ls_empty_collection(client, capsys):
85 |     mock_collection = AsyncMock(spec=AsyncCollection)
86 |     mock_collection.get.return_value = {}
87 |     with (
88 |         patch("vectorcode.subcommands.files.ls.ClientManager") as MockClientManager,
89 |         patch(
90 |             "vectorcode.subcommands.files.ls.get_collection",
91 |             return_value=mock_collection,
92 |         ),
93 |         patch("vectorcode.common.try_server", return_value=True),
94 |     ):
95 |         MockClientManager.return_value._create_client.return_value = client
96 |         assert (
97 |             await ls(Config(action=CliAction.files, files_action=FilesAction.ls)) == 0
98 |         )
99 | 


--------------------------------------------------------------------------------
/tests/subcommands/files/test_files_rm.py:
--------------------------------------------------------------------------------
  1 | from unittest.mock import AsyncMock, patch
  2 | 
  3 | import pytest
  4 | from chromadb.api.models.AsyncCollection import AsyncCollection
  5 | 
  6 | from vectorcode.cli_utils import CliAction, Config, FilesAction
  7 | from vectorcode.subcommands.files.rm import rm
  8 | 
  9 | 
 10 | @pytest.fixture
 11 | def client():
 12 |     return AsyncMock()
 13 | 
 14 | 
 15 | @pytest.fixture
 16 | def collection():
 17 |     col = AsyncMock(spec=AsyncCollection)
 18 |     col.get.return_value = {
 19 |         "ids": ["id1", "id2", "id3"],
 20 |         "distances": [0.1, 0.2, 0.3],
 21 |         "metadatas": [
 22 |             {"path": "file1.py", "start": 1, "end": 1},
 23 |             {"path": "file2.py", "start": 1, "end": 1},
 24 |             {"path": "file3.py", "start": 1, "end": 1},
 25 |         ],
 26 |         "documents": [
 27 |             "content1",
 28 |             "content2",
 29 |             "content3",
 30 |         ],
 31 |     }
 32 |     col.name = "test_collection"
 33 |     return col
 34 | 
 35 | 
 36 | @pytest.mark.asyncio
 37 | async def test_rm(client, collection, capsys):
 38 |     with (
 39 |         patch("vectorcode.subcommands.files.rm.ClientManager") as MockClientManager,
 40 |         patch(
 41 |             "vectorcode.subcommands.files.rm.get_collection", return_value=collection
 42 |         ),
 43 |         patch("vectorcode.common.try_server", return_value=True),
 44 |         patch("os.path.isfile", return_value=True),
 45 |         patch(
 46 |             "vectorcode.subcommands.files.rm.expand_path", side_effect=lambda x, y: x
 47 |         ),
 48 |     ):
 49 |         MockClientManager.return_value._create_client.return_value = client
 50 |         config = Config(
 51 |             action=CliAction.files,
 52 |             files_action=FilesAction.rm,
 53 |             rm_paths=["file1.py"],
 54 |         )
 55 |         await rm(config)
 56 |         collection.delete.assert_called_with(where={"path": {"$in": ["file1.py"]}})
 57 | 
 58 | 
 59 | @pytest.mark.asyncio
 60 | async def test_rm_empty_collection(client, collection, capsys):
 61 |     with (
 62 |         patch(
 63 |             "vectorcode.subcommands.files.rm.get_collection", return_value=collection
 64 |         ),
 65 |         patch("vectorcode.common.try_server", return_value=True),
 66 |         patch("os.path.isfile", return_value=True),
 67 |         patch(
 68 |             "vectorcode.subcommands.files.rm.expand_path", side_effect=lambda x, y: x
 69 |         ),
 70 |         patch(
 71 |             "vectorcode.subcommands.files.rm.ClientManager._create_client",
 72 |             return_value=client,
 73 |         ),
 74 |     ):
 75 |         config = Config(
 76 |             action=CliAction.files,
 77 |             files_action=FilesAction.rm,
 78 |             rm_paths=["file1.py"],
 79 |         )
 80 |         collection.count = AsyncMock(return_value=0)
 81 |         client.delete_collection = AsyncMock()
 82 |         await rm(config)
 83 |         client.delete_collection.assert_called_once_with(collection.name)
 84 | 
 85 | 
 86 | @pytest.mark.asyncio
 87 | async def test_rm_no_collection(client, collection, capsys):
 88 |     with (
 89 |         patch("vectorcode.subcommands.files.rm.ClientManager") as MockClientManager,
 90 |         patch("vectorcode.subcommands.files.rm.get_collection", side_effect=ValueError),
 91 |     ):
 92 |         MockClientManager.return_value._create_client.return_value = client
 93 |         assert (
 94 |             await rm(
 95 |                 Config(
 96 |                     action=CliAction.files,
 97 |                     files_action=FilesAction.rm,
 98 |                     pipe=True,
 99 |                     rm_paths=["file1.py"],
100 |                 )
101 |             )
102 |             != 0
103 |         )
104 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm-project.org/#use-with-ide
110 | .pdm.toml
111 | .pdm-python
112 | .pdm-build/
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 | 
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 | 
137 | # Rope project settings
138 | .ropeproject
139 | 
140 | # mkdocs documentation
141 | /site
142 | 
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 | 
148 | # Pyre type checker
149 | .pyre/
150 | 
151 | # pytype static type analyzer
152 | .pytype/
153 | 
154 | # Cython debug symbols
155 | cython_debug/
156 | 
157 | # PyCharm
158 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
161 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | #.idea/
163 | 
164 | # VectorCode
165 | src/vectorcode/_version.py
166 | 


--------------------------------------------------------------------------------
/lua/vectorcode/integrations/codecompanion/files_ls_tool.lua:
--------------------------------------------------------------------------------
  1 | ---@module "codecompanion"
  2 | 
  3 | local cc_common = require("vectorcode.integrations.codecompanion.common")
  4 | local vc_config = require("vectorcode.config")
  5 | local utils = require("vectorcode.utils")
  6 | 
  7 | local default_opts = {
  8 |   use_lsp = vc_config.get_user_config().async_backend == "lsp",
  9 | }
 10 | 
 11 | ---@param opts VectorCode.CodeCompanion.FilesLsToolOpts
 12 | ---@return CodeCompanion.Tools.Tool
 13 | return function(opts)
 14 |   opts = vim.tbl_deep_extend("force", default_opts, opts or {})
 15 |   local job_runner =
 16 |     require("vectorcode.integrations.codecompanion.common").initialise_runner(
 17 |       opts.use_lsp
 18 |     )
 19 |   local tool_name = "vectorcode_files_ls"
 20 |   ---@type CodeCompanion.Tools.Tool|{}
 21 |   return {
 22 |     name = tool_name,
 23 |     cmds = {
 24 |       ---@param tools CodeCompanion.Tools
 25 |       ---@param action {project_root: string}
 26 |       ---@return nil|{ status: string, data: string }
 27 |       function(tools, action, _, cb)
 28 |         local args = { "files", "ls", "--pipe" }
 29 |         action = utils.fix_nil(action)
 30 |         if action ~= nil then
 31 |           action.project_root = action.project_root
 32 |             or vim.fs.root(0, { ".vectorcode", ".git" })
 33 |           if action.project_root ~= nil then
 34 |             action.project_root = vim.fs.normalize(action.project_root)
 35 |             if utils.is_directory(action.project_root) then
 36 |               vim.list_extend(args, { "--project_root", action.project_root })
 37 |             end
 38 |           end
 39 |         end
 40 |         job_runner.run_async(args, function(result, error)
 41 |           if vim.islist(result) and #result > 0 then
 42 |             cb({ status = "success", data = result })
 43 |           else
 44 |             if type(error) == "table" then
 45 |               error = utils.flatten_table_to_string(error, "Unknown error.")
 46 |             end
 47 |             cb({
 48 |               status = "error",
 49 |               data = error,
 50 |             })
 51 |           end
 52 |         end, tools.chat.bufnr)
 53 |       end,
 54 |     },
 55 |     schema = {
 56 |       type = "function",
 57 |       ["function"] = {
 58 |         name = tool_name,
 59 |         description = [[
 60 | Retrieve a list of files that have been added to the database for a given project.
 61 | **ABSOLUTE PATHS** in the results indicate that the files are OUTSIDE of the current working directories and you can **ONLY** access them via the VectorCode tools.
 62 | **RELATIVE PATHS** in the results indicate that the files are INSIDE the current project. You can use VectorCode tools or any other tools that the user provided to interact with them. They are relative to the project root.
 63 |           ]],
 64 |         parameters = {
 65 |           type = "object",
 66 |           properties = {
 67 |             project_root = {
 68 |               type = "string",
 69 |               description = [[
 70 | The project that the files belong to.
 71 | The value should be one of the following:
 72 | - One of the paths from the `vectorcode_ls` tool;
 73 | - User input;
 74 | - `null` (omit this parameter), which means the current project, if found.
 75 | ]],
 76 |             },
 77 |           },
 78 |         },
 79 |       },
 80 |     },
 81 |     output = {
 82 |       ---@param tools CodeCompanion.Tools
 83 |       ---@param stdout string[][]
 84 |       success = function(_, tools, _, stdout)
 85 |         stdout = stdout[#stdout]
 86 |         local user_message
 87 |         for i, col in ipairs(stdout) do
 88 |           if i == 1 then
 89 |             user_message =
 90 |               string.format("**VectorCode `files_ls` Tool**: Found %d files.", #stdout)
 91 |           else
 92 |             user_message = ""
 93 |           end
 94 |           tools.chat:add_tool_output(
 95 |             tools.tool,
 96 |             string.format("<path>%s</path>", cc_common.cleanup_path(col)),
 97 |             user_message
 98 |           )
 99 |         end
100 |       end,
101 |     },
102 |   }
103 | end
104 | 


--------------------------------------------------------------------------------
/lua/vectorcode/integrations/codecompanion/files_rm_tool.lua:
--------------------------------------------------------------------------------
  1 | ---@module "codecompanion"
  2 | 
  3 | local cc_common = require("vectorcode.integrations.codecompanion.common")
  4 | local vc_config = require("vectorcode.config")
  5 | local utils = require("vectorcode.utils")
  6 | 
  7 | local default_opts = {
  8 |   use_lsp = vc_config.get_user_config().async_backend == "lsp",
  9 | }
 10 | 
 11 | ---@alias FilesRmArgs { paths: string[], project_root: string? }
 12 | 
 13 | ---@param opts VectorCode.CodeCompanion.FilesRmToolOpts
 14 | ---@return CodeCompanion.Tools
 15 | return function(opts)
 16 |   opts = vim.tbl_deep_extend("force", default_opts, opts or {})
 17 | 
 18 |   local tool_name = "vectorcode_files_rm"
 19 |   local job_runner = cc_common.initialise_runner(opts.use_lsp)
 20 | 
 21 |   ---@type CodeCompanion.Tools|{}
 22 |   return {
 23 |     name = tool_name,
 24 |     schema = {
 25 |       type = "function",
 26 |       ["function"] = {
 27 |         name = tool_name,
 28 |         description = "Remove files from the VectorCode database. The files will remain in the file system.",
 29 |         parameters = {
 30 |           type = "object",
 31 |           properties = {
 32 |             paths = {
 33 |               type = "array",
 34 |               items = { type = "string" },
 35 |               description = "Paths to the files to be removed from the database.",
 36 |             },
 37 |             project_root = {
 38 |               type = "string",
 39 |               description = [[
 40 | The project that the files belong to.
 41 | The value should be one of the following:
 42 | - One of the paths from the `vectorcode_ls` tool;
 43 | - User input;
 44 | - `null` (omit this parameter), which means the current project, if found.
 45 |                 ]],
 46 |             },
 47 |           },
 48 |           required = { "paths" },
 49 |         },
 50 |         strict = true,
 51 |       },
 52 |     },
 53 |     cmds = {
 54 |       ---@param tools CodeCompanion.Tools
 55 |       ---@param action VectoriseToolArgs
 56 |       ---@return nil|{ status: string, data: string }
 57 |       function(tools, action, _, cb)
 58 |         local args = { "files", "rm", "--pipe" }
 59 |         action = utils.fix_nil(action)
 60 |         if action.project_root then
 61 |           local project_root = vim.fs.abspath(vim.fs.normalize(action.project_root))
 62 |           if utils.is_directory(project_root) then
 63 |             vim.list_extend(args, { "--project_root", project_root })
 64 |           else
 65 |             return { status = "error", data = "Invalid path " .. project_root }
 66 |           end
 67 |         end
 68 |         if action.paths == nil or #action.paths == 0 then
 69 |           return { status = "error", data = "Please specify at least one path." }
 70 |         end
 71 |         vim.list_extend(
 72 |           args,
 73 |           vim
 74 |             .iter(action.paths)
 75 |             :filter(
 76 |               ---@param item string
 77 |               function(item)
 78 |                 return utils.is_file(item)
 79 |               end
 80 |             )
 81 |             :totable()
 82 |         )
 83 |         job_runner.run_async(
 84 |           args,
 85 |           ---@param result VectoriseResult
 86 |           function(result, error, code, _)
 87 |             if code == 0 then
 88 |               cb({ status = "success", data = result })
 89 |             else
 90 |               cb({ status = "error", data = { error = error, code = code } })
 91 |             end
 92 |           end,
 93 |           tools.chat.bufnr
 94 |         )
 95 |       end,
 96 |     },
 97 |     output = {
 98 |       ---@param self CodeCompanion.Tools.Tool
 99 |       prompt = function(self, _)
100 |         return string.format(
101 |           "Remove %d files from VectorCode database?",
102 |           #self.args.paths
103 |         )
104 |       end,
105 |       ---@param self CodeCompanion.Tools.Tool
106 |       ---@param tools CodeCompanion.Tools
107 |       success = function(self, tools, _, _)
108 |         tools.chat:add_tool_output(self, "**VectorCode `files_rm` tool**: successful.")
109 |       end,
110 |     },
111 |   }
112 | end
113 | 


--------------------------------------------------------------------------------
/.github/workflows/panvimdoc.yml:
--------------------------------------------------------------------------------
 1 | name: panvimdoc
 2 | 
 3 | on: 
 4 |   push:
 5 |     branches-ignore:
 6 |       - 'main'
 7 | 
 8 | permissions:
 9 |   contents: write
10 | 
11 | jobs:
12 |   docs:
13 |     runs-on: ubuntu-latest
14 |     name: pandoc to vimdoc
15 |     steps:
16 |       - uses: actions/checkout@v4
17 | 
18 |       - name: panvimdoc
19 |         uses: kdheepak/panvimdoc@main
20 |         with:
21 |           vimdoc: "VectorCode" # Output vimdoc project name (required)
22 |           pandoc: "./docs/neovim/README.md" # Input pandoc file
23 |           toc: true # Table of contents
24 |           description: "A code repository indexing tool to supercharge your LLM experience." # Project description used in title (if empty, uses neovim version and current date)
25 |           titledatepattern: "%Y %B %d" # Pattern for the date that used in the title
26 |           demojify: true # Strip emojis from the vimdoc
27 |           dedupsubheadings: true # Add heading to subheading anchor links to ensure that subheadings are unique
28 |           treesitter: true # Use treesitter for highlighting codeblocks
29 |           ignorerawblocks: true # Ignore raw html blocks in markdown when converting to vimdoc
30 |           docmapping: false # Use h4 headers as mapping docs
31 |           docmappingprojectname: true # Use project name in tag when writing mapping docs
32 |           shiftheadinglevelby: 0 # Shift heading levels by specified number
33 |           incrementheadinglevelby: 0 # Increment heading levels by specified number
34 | 
35 |       - name: panvimdoc
36 |         uses: kdheepak/panvimdoc@main
37 |         with:
38 |           vimdoc: "VectorCode-API" # Output vimdoc project name (required)
39 |           pandoc: "./docs/neovim/api_references.md" # Input pandoc file
40 |           toc: true # Table of contents
41 |           description: "A code repository indexing tool to supercharge your LLM experience." # Project description used in title (if empty, uses neovim version and current date)
42 |           titledatepattern: "%Y %B %d" # Pattern for the date that used in the title
43 |           demojify: true # Strip emojis from the vimdoc
44 |           dedupsubheadings: true # Add heading to subheading anchor links to ensure that subheadings are unique
45 |           treesitter: true # Use treesitter for highlighting codeblocks
46 |           ignorerawblocks: true # Ignore raw html blocks in markdown when converting to vimdoc
47 |           docmapping: false # Use h4 headers as mapping docs
48 |           docmappingprojectname: true # Use project name in tag when writing mapping docs
49 |           shiftheadinglevelby: 0 # Shift heading levels by specified number
50 |           incrementheadinglevelby: 0 # Increment heading levels by specified number
51 | 
52 |       - name: panvimdoc
53 |         uses: kdheepak/panvimdoc@main
54 |         with:
55 |           vimdoc: "VectorCode-cli" # Output vimdoc project name (required)
56 |           pandoc: "./docs/cli.md" # Input pandoc file
57 |           toc: true # Table of contents
58 |           description: "A code repository indexing tool to supercharge your LLM experience." # Project description used in title (if empty, uses neovim version and current date)
59 |           titledatepattern: "%Y %B %d" # Pattern for the date that used in the title
60 |           demojify: true # Strip emojis from the vimdoc
61 |           dedupsubheadings: true # Add heading to subheading anchor links to ensure that subheadings are unique
62 |           treesitter: true # Use treesitter for highlighting codeblocks
63 |           ignorerawblocks: true # Ignore raw html blocks in markdown when converting to vimdoc
64 |           docmapping: false # Use h4 headers as mapping docs
65 |           docmappingprojectname: true # Use project name in tag when writing mapping docs
66 |           shiftheadinglevelby: 0 # Shift heading levels by specified number
67 |           incrementheadinglevelby: 0 # Increment heading levels by specified number
68 | 
69 |       - uses: stefanzweifel/git-auto-commit-action@v6.0.1
70 |         with:
71 |           commit_message: "Auto generate docs"
72 |           branch: ${{ github.head_ref }}
73 |           file_pattern: 'doc/*.txt'
74 | 


--------------------------------------------------------------------------------
/lua/vectorcode/integrations/copilotchat.lua:
--------------------------------------------------------------------------------
  1 | ---@module "CopilotChat"
  2 | 
  3 | ---@class VectorCode.CopilotChat.ContextOpts
  4 | ---@field max_num number?
  5 | ---@field use_lsp boolean?
  6 | 
  7 | local async = require("plenary.async")
  8 | local vc_config = require("vectorcode.config")
  9 | local logger = vc_config.logger
 10 | local notify_opts = vc_config.notify_opts
 11 | local check_cli_wrap = vc_config.check_cli_wrap
 12 | local job_runner = nil
 13 | 
 14 | ---@param use_lsp boolean
 15 | local function get_runner(use_lsp)
 16 |   if job_runner == nil then
 17 |     if use_lsp then
 18 |       job_runner = require("vectorcode.jobrunner.lsp")
 19 |     end
 20 |     if job_runner == nil then
 21 |       job_runner = require("vectorcode.jobrunner.cmd")
 22 |       if use_lsp then
 23 |         vim.schedule_wrap(vim.notify)(
 24 |           "Failed to initialise the LSP runner. Falling back to cmd runner.",
 25 |           vim.log.levels.WARN,
 26 |           notify_opts
 27 |         )
 28 |       end
 29 |     end
 30 |   end
 31 |   return job_runner
 32 | end
 33 | 
 34 | ---@param args string[]
 35 | ---@param use_lsp boolean
 36 | ---@param bufnr integer
 37 | ---@async
 38 | local run_job = async.wrap(function(args, use_lsp, bufnr, callback)
 39 |   local runner = get_runner(use_lsp)
 40 |   assert(runner ~= nil, "Failed to initialize the runner!")
 41 |   runner.run_async(args, callback, bufnr)
 42 | end, 4)
 43 | 
 44 | ---@param opts VectorCode.CopilotChat.ContextOpts?
 45 | ---@return CopilotChat.config.context
 46 | local make_context_provider = check_cli_wrap(function(opts)
 47 |   opts = vim.tbl_deep_extend("force", {
 48 |     max_num = 5,
 49 |     use_lsp = vc_config.get_user_config().async_backend == "lsp",
 50 |   }, opts or {})
 51 |   logger.info("Creating CopilotChat context provider with the following opts:\n", opts)
 52 | 
 53 |   local utils = require("CopilotChat.utils")
 54 | 
 55 |   return {
 56 |     description = [[This gives you the ability to access the repository to find information that you may need to assist the user. Supports input (query).
 57 | 
 58 | - **Use at your discretion** when you feel you don't have enough information about the repository or project.
 59 | - **Don't escape** special characters.
 60 | - If a class, type or function has been imported from another file, this context may be able to find its source. Add the name of the imported symbol to the query.
 61 | - The embeddings are mostly generated from source code, so using keywords that may be present in source code may help with the retrieval.
 62 | - Avoid retrieving one single file because the retrieval mechanism may not be very accurate.
 63 | = If a query failed to retrieve desired results, a new attempt should use different keywords that are orthogonal to the previous ones but with similar meanings
 64 | - Do not use exact query keywords that you have used in a previous context call in the conversation, unless the user instructed otherwise
 65 | ]],
 66 | 
 67 |     input = function(callback)
 68 |       vim.ui.input({
 69 |         prompt = "Enter query> ",
 70 |       }, callback)
 71 |     end,
 72 | 
 73 |     resolve = function(input, source, prompt)
 74 |       if not input or input == "" then
 75 |         input = prompt
 76 |       end
 77 | 
 78 |       local args = {
 79 |         "query",
 80 |         "--pipe",
 81 |         "-n",
 82 |         tostring(opts.max_num),
 83 |         '"' .. input .. '"',
 84 |       }
 85 | 
 86 |       local cwd = source.cwd()
 87 |       local try_root = vim.fs.root(cwd, ".vectorcode") or vim.fs.root(cwd, ".git")
 88 |       if try_root ~= nil then
 89 |         vim.list_extend(args, { "--project_root", try_root })
 90 |       end
 91 |       logger.info("CopilotChat ctx provider called with the following args: ", args)
 92 |       local result, err = run_job(args, opts.use_lsp, source.bufnr)
 93 |       if utils.empty(result) and err then
 94 |         error(utils.make_string(err))
 95 |       end
 96 | 
 97 |       utils.schedule_main()
 98 |       return vim.tbl_map(function(item)
 99 |         return {
100 |           content = item.document,
101 |           filename = item.path,
102 |           filetype = utils.filetype(item.path),
103 |         }
104 |       end, result)
105 |     end,
106 |   }
107 | end)
108 | 
109 | return {
110 |   make_context_provider = make_context_provider,
111 | }
112 | 


--------------------------------------------------------------------------------
/src/vectorcode/main.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import logging
  3 | import os
  4 | import sys
  5 | import traceback
  6 | 
  7 | import httpx
  8 | 
  9 | from vectorcode import __version__
 10 | from vectorcode.cli_utils import (
 11 |     CliAction,
 12 |     config_logging,
 13 |     find_project_root,
 14 |     get_project_config,
 15 |     parse_cli_args,
 16 | )
 17 | from vectorcode.common import ClientManager
 18 | 
 19 | logger = logging.getLogger(name=__name__)
 20 | 
 21 | 
 22 | async def async_main():
 23 |     cli_args = await parse_cli_args()
 24 |     if cli_args.no_stderr:
 25 |         sys.stderr = open(os.devnull, "w")
 26 | 
 27 |     if cli_args.debug:
 28 |         from vectorcode import debugging
 29 | 
 30 |         debugging.enable()
 31 | 
 32 |     logger.info("Collected CLI arguments: %s", cli_args)
 33 | 
 34 |     if cli_args.project_root is None:
 35 |         cwd = os.getcwd()
 36 |         cli_args.project_root = (
 37 |             find_project_root(cwd, ".vectorcode")
 38 |             or find_project_root(cwd, ".git")
 39 |             or cwd
 40 |         )
 41 | 
 42 |     logger.info(f"Project root is set to {cli_args.project_root}")
 43 | 
 44 |     try:
 45 |         final_configs = await (
 46 |             await get_project_config(cli_args.project_root)
 47 |         ).merge_from(cli_args)
 48 |     except IOError as e:
 49 |         traceback.print_exception(e, file=sys.stderr)
 50 |         return 1
 51 | 
 52 |     logger.info("Final configuration has been built: %s", final_configs)
 53 | 
 54 |     match cli_args.action:
 55 |         case CliAction.check:
 56 |             from vectorcode.subcommands import check
 57 | 
 58 |             return await check(cli_args)
 59 |         case CliAction.init:
 60 |             from vectorcode.subcommands import init
 61 | 
 62 |             return await init(cli_args)
 63 |         case CliAction.version:
 64 |             print(__version__)
 65 |             return 0
 66 |         case CliAction.prompts:
 67 |             from vectorcode.subcommands import prompts
 68 | 
 69 |             return prompts(cli_args)
 70 |         case CliAction.chunks:
 71 |             from vectorcode.subcommands import chunks
 72 | 
 73 |             return await chunks(final_configs)
 74 | 
 75 |     if final_configs.pipe:  # pragma: nocover
 76 |         # NOTE: NNCF (intel GPU acceleration for sentence transformer) keeps showing logs.
 77 |         # This disables logs below ERROR so that it doesn't hurt the `pipe` output.
 78 |         logging.disable(logging.ERROR - 1)
 79 | 
 80 |     return_val = 0
 81 |     try:
 82 |         match final_configs.action:
 83 |             case CliAction.query:
 84 |                 from vectorcode.subcommands import query
 85 | 
 86 |                 return_val = await query(final_configs)
 87 |             case CliAction.vectorise:
 88 |                 from vectorcode.subcommands import vectorise
 89 | 
 90 |                 return_val = await vectorise(final_configs)
 91 |             case CliAction.drop:
 92 |                 from vectorcode.subcommands import drop
 93 | 
 94 |                 return_val = await drop(final_configs)
 95 |             case CliAction.ls:
 96 |                 from vectorcode.subcommands import ls
 97 | 
 98 |                 return_val = await ls(final_configs)
 99 |             case CliAction.update:
100 |                 from vectorcode.subcommands import update
101 | 
102 |                 return_val = await update(final_configs)
103 |             case CliAction.clean:
104 |                 from vectorcode.subcommands import clean
105 | 
106 |                 return_val = await clean(final_configs)
107 |             case CliAction.files:
108 |                 from vectorcode.subcommands import files
109 | 
110 |                 return_val = await files(final_configs)
111 |     except Exception as e:
112 |         return_val = 1
113 |         if isinstance(e, httpx.RemoteProtocolError):  # pragma: nocover
114 |             e.add_note(
115 |                 f"Please verify that {final_configs.db_url} is a working chromadb server."
116 |             )
117 |         logger.error(traceback.format_exc())
118 |     finally:
119 |         await ClientManager().kill_servers()
120 |         return return_val
121 | 
122 | 
123 | def main():  # pragma: nocover
124 |     config_logging("vectorcode")
125 |     return asyncio.run(async_main())
126 | 
127 | 
128 | if __name__ == "__main__":  # pragma: nocover
129 |     sys.exit(main())
130 | 


--------------------------------------------------------------------------------
/lua/vectorcode/jobrunner/lsp.lua:
--------------------------------------------------------------------------------
  1 | local vc_config = require("vectorcode.config")
  2 | 
  3 | ---@type VectorCode.JobRunner
  4 | local jobrunner = {}
  5 | 
  6 | ---@type vim.lsp.Client
  7 | local CLIENT = nil
  8 | 
  9 | local notify_opts = vc_config.notify_opts
 10 | local logger = vc_config.logger
 11 | 
 12 | --- Returns the Client ID if applicable, or `nil` if the language server fails to start
 13 | ---@param ok_to_fail boolean
 14 | ---@return integer?
 15 | function jobrunner.init(ok_to_fail)
 16 |   local existing_clients = vim.lsp.get_clients({ name = vc_config.lsp_configs().name })
 17 |   if #existing_clients > 0 then
 18 |     CLIENT = existing_clients[1]
 19 |     return CLIENT.id
 20 |   end
 21 |   ok_to_fail = ok_to_fail or true
 22 |   local client_id = vim.lsp.start(vc_config.lsp_configs(), {})
 23 |   if client_id ~= nil then
 24 |     -- server started
 25 |     CLIENT = vim.lsp.get_client_by_id(client_id) --[[@as vim.lsp.Client]]
 26 |   else
 27 |     -- failed to start server
 28 |     if vc_config.get_user_config().notify or not ok_to_fail then
 29 |       local message = "Failed to start vectorcode-server due some error."
 30 |       logger.error(message)
 31 |       vim.schedule(function()
 32 |         vim.notify(message, vim.log.levels.ERROR, notify_opts)
 33 |       end)
 34 |     end
 35 |     return nil
 36 |   end
 37 |   return client_id
 38 | end
 39 | 
 40 | function jobrunner.run(args, timeout_ms, bufnr)
 41 |   jobrunner.init(false)
 42 |   assert(CLIENT ~= nil, "Failed to initialize the LSP server!")
 43 |   assert(bufnr ~= nil, "Need to pass the buffer number!")
 44 |   if timeout_ms == nil or timeout_ms < 0 then
 45 |     timeout_ms = 2 ^ 31 - 1
 46 |   end
 47 |   args = require("vectorcode.jobrunner").find_root(args, bufnr)
 48 | 
 49 |   local result, err, code
 50 |   jobrunner.run_async(args, function(res, e, e_code)
 51 |     result = res
 52 |     err = e
 53 |     code = e_code
 54 |   end, bufnr)
 55 |   vim.wait(timeout_ms, function()
 56 |     return (result ~= nil) or (err ~= nil)
 57 |   end)
 58 |   return result or {}, err, code
 59 | end
 60 | 
 61 | function jobrunner.run_async(args, callback, bufnr)
 62 |   assert(jobrunner.init(false))
 63 |   assert(bufnr ~= nil, "Need to pass the buffer number!")
 64 |   if not CLIENT.attached_buffers[bufnr] then
 65 |     if vim.lsp.buf_attach_client(bufnr, CLIENT.id) then
 66 |       local uri = vim.uri_from_bufnr(bufnr)
 67 |       local text = vim.api.nvim_buf_get_lines(bufnr, 0, -1, true)
 68 |       vim.schedule_wrap(CLIENT.notify)(vim.lsp.protocol.Methods.textDocument_didOpen, {
 69 |         textDocument = {
 70 |           uri = uri,
 71 |           text = text,
 72 |           version = 1,
 73 |           languageId = vim.bo[bufnr].filetype,
 74 |         },
 75 |       })
 76 |     else
 77 |       local message = "Failed to attach lsp client"
 78 |       vim.schedule(function()
 79 |         vim.notify(message)
 80 |       end)
 81 |       logger.warn(message)
 82 |     end
 83 |   end
 84 |   args = require("vectorcode.jobrunner").find_root(args, bufnr)
 85 |   logger.debug(
 86 |     ("lsp jobrunner for buffer %s args: %s"):format(bufnr, vim.inspect(args))
 87 |   )
 88 |   local _, id = CLIENT:request(
 89 |     vim.lsp.protocol.Methods.workspace_executeCommand,
 90 |     -- NOTE: This is not a hardcoded executable, but rather part of our LSP implementation.
 91 |     { command = "vectorcode", arguments = args },
 92 |     function(err, result, _, _)
 93 |       if type(callback) == "function" then
 94 |         local err_message = {}
 95 |         if err ~= nil and err.message ~= nil then
 96 |           err_message = { err.message }
 97 |         end
 98 |         local code = 0
 99 |         if err and err.code then
100 |           code = err.code
101 |         end
102 |         vim.schedule_wrap(callback)(result, err_message, code)
103 |         if result then
104 |           logger.debug("lsp jobrunner result:\n", result)
105 |         end
106 |         if err then
107 |           logger.info("lsp jobrunner error:\n", err)
108 |         end
109 |       end
110 |     end,
111 |     bufnr
112 |   )
113 |   return id
114 | end
115 | 
116 | function jobrunner.is_job_running(job_handler)
117 |   jobrunner.init(true)
118 |   if CLIENT ~= nil then
119 |     local request_data = CLIENT.requests[job_handler]
120 |     return request_data ~= nil and request_data.type == "pending"
121 |   end
122 |   return false
123 | end
124 | 
125 | function jobrunner.stop_job(job_handler)
126 |   jobrunner.init(true)
127 |   if CLIENT ~= nil then
128 |     CLIENT:cancel_request(job_handler)
129 |   end
130 | end
131 | 
132 | return jobrunner
133 | 


--------------------------------------------------------------------------------
/tests/subcommands/test_update.py:
--------------------------------------------------------------------------------
  1 | from unittest.mock import AsyncMock, patch
  2 | 
  3 | import pytest
  4 | from chromadb.api.types import IncludeEnum
  5 | from chromadb.errors import InvalidCollectionException
  6 | 
  7 | from vectorcode.cli_utils import Config
  8 | from vectorcode.subcommands.update import update
  9 | 
 10 | 
 11 | @pytest.mark.asyncio
 12 | async def test_update_success():
 13 |     mock_client = AsyncMock()
 14 |     mock_collection = AsyncMock()
 15 |     mock_collection.get.return_value = {
 16 |         "metadatas": [{"path": "file1.py"}, {"path": "file2.py"}]
 17 |     }
 18 |     mock_collection.delete = AsyncMock()
 19 |     mock_client.get_max_batch_size.return_value = 100
 20 | 
 21 |     with (
 22 |         patch("vectorcode.subcommands.update.ClientManager"),
 23 |         patch(
 24 |             "vectorcode.subcommands.update.get_collection", return_value=mock_collection
 25 |         ),
 26 |         patch("vectorcode.subcommands.update.verify_ef", return_value=True),
 27 |         patch("os.path.isfile", return_value=True),
 28 |         patch(
 29 |             "vectorcode.subcommands.update.chunked_add", new_callable=AsyncMock
 30 |         ) as mock_chunked_add,
 31 |         patch("vectorcode.subcommands.update.show_stats"),
 32 |     ):
 33 |         config = Config(project_root="/test/project", pipe=False)
 34 |         result = await update(config)
 35 | 
 36 |         assert result == 0
 37 |         mock_collection.get.assert_called_once_with(include=[IncludeEnum.metadatas])
 38 |         assert mock_chunked_add.call_count == 2
 39 |         mock_collection.delete.assert_not_called()
 40 | 
 41 | 
 42 | @pytest.mark.asyncio
 43 | async def test_update_with_orphans():
 44 |     mock_client = AsyncMock()
 45 |     mock_collection = AsyncMock()
 46 |     mock_collection.get.return_value = {
 47 |         "metadatas": [{"path": "file1.py"}, {"path": "file2.py"}, {"path": "orphan.py"}]
 48 |     }
 49 |     mock_collection.delete = AsyncMock()
 50 |     mock_client.get_max_batch_size.return_value = 100
 51 | 
 52 |     with (
 53 |         patch("vectorcode.subcommands.update.ClientManager"),
 54 |         patch(
 55 |             "vectorcode.subcommands.update.get_collection", return_value=mock_collection
 56 |         ),
 57 |         patch("vectorcode.subcommands.update.verify_ef", return_value=True),
 58 |         patch("os.path.isfile", side_effect=[True, True, False]),
 59 |         patch(
 60 |             "vectorcode.subcommands.update.chunked_add", new_callable=AsyncMock
 61 |         ) as mock_chunked_add,
 62 |         patch("vectorcode.subcommands.update.show_stats"),
 63 |     ):
 64 |         config = Config(project_root="/test/project", pipe=False)
 65 |         result = await update(config)
 66 | 
 67 |         assert result == 0
 68 |         mock_collection.get.assert_called_once_with(include=[IncludeEnum.metadatas])
 69 |         assert mock_chunked_add.call_count == 2
 70 |         mock_collection.delete.assert_called_once_with(
 71 |             where={"path": {"$in": ["orphan.py"]}}
 72 |         )
 73 | 
 74 | 
 75 | @pytest.mark.asyncio
 76 | async def test_update_index_error():
 77 |     mock_client = AsyncMock()
 78 |     # mock_collection = AsyncMock()
 79 | 
 80 |     with (
 81 |         patch("vectorcode.subcommands.update.ClientManager") as MockClientManager,
 82 |         patch("vectorcode.subcommands.update.get_collection", side_effect=IndexError),
 83 |         patch("sys.stderr"),
 84 |     ):
 85 |         MockClientManager.return_value._create_client.return_value = mock_client
 86 |         config = Config(project_root="/test/project", pipe=False)
 87 |         result = await update(config)
 88 | 
 89 |         assert result == 1
 90 | 
 91 | 
 92 | @pytest.mark.asyncio
 93 | async def test_update_value_error():
 94 |     mock_client = AsyncMock()
 95 |     # mock_collection = AsyncMock()
 96 | 
 97 |     with (
 98 |         patch("vectorcode.subcommands.update.ClientManager") as MockClientManager,
 99 |         patch("vectorcode.subcommands.update.get_collection", side_effect=ValueError),
100 |         patch("sys.stderr"),
101 |     ):
102 |         MockClientManager.return_value._create_client.return_value = mock_client
103 |         config = Config(project_root="/test/project", pipe=False)
104 |         result = await update(config)
105 | 
106 |         assert result == 1
107 | 
108 | 
109 | @pytest.mark.asyncio
110 | async def test_update_invalid_collection_exception():
111 |     mock_client = AsyncMock()
112 |     # mock_collection = AsyncMock()
113 | 
114 |     with (
115 |         patch("vectorcode.subcommands.update.ClientManager") as MockClientManager,
116 |         patch(
117 |             "vectorcode.subcommands.update.get_collection",
118 |             side_effect=InvalidCollectionException,
119 |         ),
120 |         patch("sys.stderr"),
121 |     ):
122 |         MockClientManager.return_value._create_client.return_value = mock_client
123 |         config = Config(project_root="/test/project", pipe=False)
124 |         result = await update(config)
125 | 
126 |         assert result == 1
127 | 


--------------------------------------------------------------------------------
/lua/vectorcode/integrations/codecompanion/prompts/init.lua:
--------------------------------------------------------------------------------
  1 | local M = {}
  2 | 
  3 | local vc_config = require("vectorcode.config")
  4 | 
  5 | local utils = require("vectorcode.utils")
  6 | 
  7 | ---@param path string[]|string path to files or wildcards.
  8 | ---@param project_root? string
  9 | ---@param callback? VectorCode.JobRunner.Callback
 10 | function M.vectorise_files(path, project_root, callback)
 11 |   if type(path) == "string" then
 12 |     path = { path }
 13 |   end
 14 |   assert(not vim.tbl_isempty(path), "`path` cannot be empty")
 15 | 
 16 |   local jobrunner =
 17 |     require("vectorcode.integrations.codecompanion.common").initialise_runner(
 18 |       vc_config.get_user_config().async_backend == "lsp"
 19 |     )
 20 | 
 21 |   local args = { "vectorise", "--pipe" }
 22 |   if project_root then
 23 |     vim.list_extend(args, { "--project_root", project_root })
 24 |   end
 25 |   vim.list_extend(args, path)
 26 |   jobrunner.run_async(args, function(result, error, code, signal)
 27 |     if type(callback) == "function" then
 28 |       callback(result, error, code, signal)
 29 |     end
 30 |   end, 0)
 31 | end
 32 | 
 33 | ---@class VectorCode.CodeCompanion.PromptFactory.Opts
 34 | ---@field name string? human-readable name of this prompt
 35 | ---@field project_root string|(fun():string) project_root of the files to be added to the database
 36 | ---Paths to the files in the local directory to be added to the database.
 37 | ---
 38 | ---These should either be absolute paths, or relative to the project root.
 39 | ---@field file_patterns string[]|(fun():string[])
 40 | ---See https://codecompanion.olimorris.dev/extending/prompts.html#recipe-2-using-context-in-your-prompts
 41 | ---
 42 | ---Note: If a system prompt is set here, your default chat system prompt will be ignored.
 43 | ---@field system_prompt? string|fun(context:table):string
 44 | ---This contains some preliminary messages (filled into the chat buffer) that tells the LLM about the task.
 45 | ---If you're overwriting the default message, make sure to include the tool (`@{vectorcode_query}`).
 46 | ---
 47 | ---See https://codecompanion.olimorris.dev/extending/prompts.html#recipe-2-using-context-in-your-prompts
 48 | ---@field user_prompt? string|fun(context:table):string
 49 | 
 50 | ---@param opts VectorCode.CodeCompanion.PromptFactory.Opts
 51 | function M.register_prompt(opts)
 52 |   opts = vim.deepcopy(opts)
 53 | 
 54 |   if type(opts.file_patterns) == "function" then
 55 |     opts.file_patterns = opts.file_patterns()
 56 |   end
 57 | 
 58 |   assert(
 59 |     ---@diagnostic disable-next-line: param-type-mismatch
 60 |     type(opts.project_root) == "string" and utils.is_directory(opts.project_root),
 61 |     string.format("`%s` is not a valid directory.", opts.project_root)
 62 |   )
 63 |   assert(
 64 |     ---@diagnostic disable-next-line: param-type-mismatch
 65 |     opts.file_patterns ~= nil and (not vim.tbl_isempty(opts.file_patterns)),
 66 |     "Recieved empty path specs."
 67 |   )
 68 | 
 69 |   assert(type(opts.name) == "string", "`name` cannot be `nil`.")
 70 | 
 71 |   local constants = require("codecompanion.config").config.constants
 72 |   local prompts = {}
 73 | 
 74 |   if opts.system_prompt then
 75 |     table.insert(
 76 |       prompts,
 77 |       { role = constants.SYSTEM_ROLE, content = opts.system_prompt }
 78 |     )
 79 |   end
 80 |   table.insert(prompts, #prompts + 1, {
 81 |     role = constants.USER_ROLE,
 82 |     content = opts.user_prompt
 83 |       or string.format(
 84 |         [[I have some questions about the documents under the `%s` directory.
 85 | The files have been added to the database and can be searched by calling the @{vectorcode_query} tool.
 86 | When you call the tool, use `%s` as the value for the argument `project_root`.
 87 | Use the information returned by the tool to answer my questions, and cite the sources when appropriate.
 88 | If you need more information, call the tool with different search keywords or ask for more context and/or tools.
 89 | 
 90 | Here's my question:  
 91 | 
 92 | - ]],
 93 |         opts.project_root,
 94 |         opts.project_root
 95 |       ),
 96 |   })
 97 |   return {
 98 |     name = opts.name,
 99 |     strategy = "chat",
100 |     opts = {
101 |       ignore_system_prompt = opts.system_prompt ~= nil,
102 |       pre_hook = function()
103 |         if vc_config.get_user_config().notify then
104 |           vim.notify(
105 |             string.format("Adding files under `%s` to the database.", opts.project_root),
106 |             vim.log.levels.INFO,
107 |             vc_config.notify_opts
108 |           )
109 |         end
110 |         M.vectorise_files(
111 |           vim
112 |             .iter(opts.file_patterns)
113 |             :map(function(p)
114 |               if vim.fn.isabsolutepath(p) == 1 then
115 |                 return p
116 |               else
117 |                 return vim.fs.joinpath(opts.project_root, p)
118 |               end
119 |             end)
120 |             :totable(),
121 |           opts.project_root,
122 |           function(result, err, _, _)
123 |             if result ~= nil and not vim.tbl_isempty(result) then
124 |               vim.schedule_wrap(vim.notify)(
125 |                 string.format(
126 |                   "Vectorised %d new files.",
127 |                   result.add or 0,
128 |                   opts.project_root
129 |                 ),
130 |                 vim.log.levels.INFO,
131 |                 vc_config.notify_opts
132 |               )
133 |             elseif err ~= nil then
134 |               err = utils.flatten_table_to_string(err, "Unknown error.")
135 |               vim.schedule_wrap(vim.notify)(
136 |                 err,
137 |                 vim.log.levels.WARN,
138 |                 vc_config.notify_opts
139 |               )
140 |             end
141 |           end
142 |         )
143 |       end,
144 |     },
145 |     prompts = prompts,
146 |   }
147 | end
148 | return M
149 | 


--------------------------------------------------------------------------------
/src/vectorcode/subcommands/init.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import logging
  3 | import os
  4 | import platform
  5 | import re
  6 | import shutil
  7 | import stat
  8 | from pathlib import Path
  9 | from typing import Optional
 10 | 
 11 | from vectorcode.cli_utils import GLOBAL_CONFIG_DIR, Config, find_project_root
 12 | 
 13 | logger = logging.getLogger(name=__name__)
 14 | 
 15 | __GLOBAL_HOOKS_PATH = Path(GLOBAL_CONFIG_DIR) / "hooks"
 16 | 
 17 | 
 18 | # Keys: name of the hooks, ie. `pre-commit`
 19 | # Values: lines of the hooks.
 20 | __HOOK_CONTENTS: dict[str, list[str]] = {
 21 |     "pre-commit": [
 22 |         "diff_files=$(git diff --cached --name-only)",
 23 |         'if [ -d ".vectorcode" ] && [ ! -z "$diff_files" ]; then',
 24 |         "  vectorcode vectorise $diff_files",
 25 |         "fi",
 26 |     ],
 27 |     "post-checkout": [
 28 |         'if [ -z "$(echo $1|grep [^0])" ]; then',
 29 |         '  files=""',
 30 |         "  ( [ -f .vectorcode/vectorcode.include ] || [ -f ~/.config/vectorcode/vectorcode.include ] ) && vectorcode vectorise || true",
 31 |         "else",
 32 |         '  files=$(git diff --name-only "$1" "$2")',
 33 |         "fi",
 34 |         'if [ -d ".vectorcode" ] && [ ! -z "$files" ]; then',
 35 |         "  vectorcode vectorise $files",
 36 |         "fi",
 37 |     ],
 38 | }
 39 | 
 40 | 
 41 | def __lines_are_empty(lines: list[str]) -> bool:
 42 |     pattern = re.compile(r"^\s*$")
 43 |     if len(lines) == 0:
 44 |         return True
 45 |     return all(map(lambda line: pattern.match(line) is not None, lines))
 46 | 
 47 | 
 48 | def load_hooks():
 49 |     global __HOOK_CONTENTS
 50 |     for file in glob.glob(str(__GLOBAL_HOOKS_PATH / "*")):
 51 |         hook_name = Path(file).stem
 52 |         with open(file) as fin:
 53 |             lines = fin.readlines()
 54 |             if not __lines_are_empty(lines):
 55 |                 __HOOK_CONTENTS[hook_name] = lines
 56 | 
 57 | 
 58 | class HookFile:
 59 |     prefix = "# VECTORCODE_HOOK_START"
 60 |     suffix = "# VECTORCODE_HOOK_END"
 61 |     prefix_pattern = re.compile(r"^\s*#\s*VECTORCODE_HOOK_START\s*")
 62 |     suffix_pattern = re.compile(r"^\s*#\s*VECTORCODE_HOOK_END\s*")
 63 | 
 64 |     def __init__(self, path: str | Path, git_dir: Optional[str | Path] = None):
 65 |         self.path = path
 66 |         self.lines: list[str] = []
 67 |         if os.path.isfile(self.path):
 68 |             with open(self.path) as fin:
 69 |                 self.lines.extend(fin.readlines())
 70 | 
 71 |     def has_vectorcode_hooks(self, force: bool = False) -> bool:
 72 |         for start, start_line in enumerate(self.lines):
 73 |             if self.prefix_pattern.match(start_line) is None:
 74 |                 continue
 75 | 
 76 |             for end in range(start + 1, len(self.lines)):
 77 |                 if self.suffix_pattern.match(self.lines[end]) is not None:
 78 |                     if force:
 79 |                         logger.debug("`force` cleaning existing VectorCode hooks...")
 80 |                         new_lines = self.lines[:start] + self.lines[end + 1 :]
 81 |                         self.lines[:] = new_lines
 82 |                         return False
 83 |                     logger.debug(
 84 |                         f"Found vectorcode hook block between line {start} and {end} in {self.path}:\n{''.join(self.lines[start + 1 : end])}"
 85 |                     )
 86 |                     return True
 87 | 
 88 |         return False
 89 | 
 90 |     def inject_hook(self, content: list[str], force: bool = False):
 91 |         if len(self.lines) == 0 or not self.has_vectorcode_hooks(force):
 92 |             self.lines.append(self.prefix + "\n")
 93 |             self.lines.extend(i if i.endswith("\n") else i + "\n" for i in content)
 94 |             self.lines.append(self.suffix + "\n")
 95 |         with open(self.path, "w") as fin:
 96 |             if os.path.islink(self.path):  # pragma: nocover
 97 |                 logger.warning(f"{self.path} is a symlink.")
 98 |             fin.writelines(self.lines)
 99 |         if platform.system() != "Windows":
100 |             # for unix systems, set the executable bit.
101 |             curr_mode = os.stat(self.path).st_mode
102 |             os.chmod(self.path, mode=curr_mode | stat.S_IXUSR)
103 | 
104 | 
105 | async def init(configs: Config) -> int:
106 |     assert configs.project_root is not None
107 |     project_config_dir = os.path.join(str(configs.project_root), ".vectorcode")
108 |     is_initialised = 0
109 |     if os.path.isdir(project_config_dir) and not configs.force:
110 |         logger.warning(
111 |             f"{configs.project_root} is already initialised for VectorCode.",
112 |         )
113 |         is_initialised = 1
114 |     else:
115 |         os.makedirs(project_config_dir, exist_ok=True)
116 |         for item in (
117 |             "config.json5",
118 |             "config.json",
119 |             "vectorcode.include",
120 |             "vectorcode.exclude",
121 |         ):
122 |             local_file_path = os.path.join(project_config_dir, item)
123 |             global_file_path = os.path.join(
124 |                 os.path.expanduser("~"), ".config", "vectorcode", item
125 |             )
126 |             if os.path.isfile(global_file_path):
127 |                 logger.debug(f"Copying global {item} to {project_config_dir}")
128 |                 shutil.copyfile(global_file_path, local_file_path)
129 | 
130 |         print(f"VectorCode project root has been initialised at {configs.project_root}")
131 |         print(
132 |             "Note: The collection in the database will not be created until you vectorise a file."
133 |         )
134 | 
135 |     git_root = find_project_root(configs.project_root, ".git")
136 |     if git_root:
137 |         load_hooks()
138 |         for hook in __HOOK_CONTENTS.keys():
139 |             hook_file_path = os.path.join(git_root, ".git", "hooks", hook)
140 |             logger.info(f"Writing {hook} hook into {hook_file_path}.")
141 |             print(f"Processing {hook} hook...")
142 |             hook_obj = HookFile(hook_file_path, git_dir=git_root)
143 |             hook_obj.inject_hook(__HOOK_CONTENTS[hook], configs.force)
144 | 
145 |     return is_initialised
146 | 


--------------------------------------------------------------------------------
/lua/vectorcode/integrations/codecompanion/vectorise_tool.lua:
--------------------------------------------------------------------------------
  1 | ---@module "codecompanion"
  2 | 
  3 | local cc_common = require("vectorcode.integrations.codecompanion.common")
  4 | local vc_config = require("vectorcode.config")
  5 | local utils = require("vectorcode.utils")
  6 | local logger = vc_config.logger
  7 | 
  8 | ---@alias VectoriseToolArgs { paths: string[], project_root: string? }
  9 | 
 10 | ---@alias VectoriseResult { add: integer, update: integer, removed: integer }
 11 | 
 12 | ---@type VectorCode.CodeCompanion.VectoriseToolOpts
 13 | local default_vectorise_options = {
 14 |   use_lsp = vc_config.get_user_config().async_backend == "lsp",
 15 | }
 16 | 
 17 | ---@param opts VectorCode.CodeCompanion.VectoriseToolOpts|{}|nil
 18 | ---@return VectorCode.CodeCompanion.VectoriseToolOpts
 19 | local get_vectorise_tool_opts = function(opts)
 20 |   opts = vim.tbl_deep_extend("force", default_vectorise_options, opts or {})
 21 |   logger.info(
 22 |     string.format(
 23 |       "Loading `vectorcode_vectorise` with the following opts:\n%s",
 24 |       vim.inspect(opts)
 25 |     )
 26 |   )
 27 |   return opts
 28 | end
 29 | 
 30 | ---@param opts VectorCode.CodeCompanion.VectoriseToolOpts|{}|nil
 31 | ---@return CodeCompanion.Tools
 32 | return function(opts)
 33 |   opts = get_vectorise_tool_opts(opts)
 34 |   local tool_name = "vectorcode_vectorise"
 35 |   local job_runner = cc_common.initialise_runner(opts.use_lsp)
 36 | 
 37 |   ---@type CodeCompanion.Tools|{}
 38 |   return {
 39 |     name = tool_name,
 40 |     schema = {
 41 |       type = "function",
 42 |       ["function"] = {
 43 |         name = tool_name,
 44 |         description = [[
 45 | Vectorise files in a project so that they'll be available from the `vectorcode_query` tool.
 46 | The paths should be accurate (DO NOT ASSUME A PATH EXIST) and case case-sensitive.
 47 | ]],
 48 |         parameters = {
 49 |           type = "object",
 50 |           properties = {
 51 |             paths = {
 52 |               type = "array",
 53 |               items = { type = "string" },
 54 |               description = "Paths to the files to be vectorised. DO NOT use directories for this parameter. You may use wildcard here if the user instructed to do so.",
 55 |             },
 56 |             project_root = {
 57 |               type = "string",
 58 |               description = [[
 59 | The project that the files belong to.
 60 | The value should be one of the following:
 61 | - One of the paths from the `vectorcode_ls` tool;
 62 | - User input;
 63 | - `null` (omit this parameter), which means the current project, if found.
 64 | ]],
 65 |             },
 66 |           },
 67 |           required = { "paths" },
 68 |         },
 69 |       },
 70 |     },
 71 |     cmds = {
 72 |       ---@param tools CodeCompanion.Tools
 73 |       ---@param action VectoriseToolArgs
 74 |       ---@return nil|{ status: string, data: string }
 75 |       function(tools, action, _, cb)
 76 |         local args = { "vectorise", "--pipe" }
 77 |         action = utils.fix_nil(action)
 78 |         if action.project_root then
 79 |           local project_root = vim.fs.abspath(vim.fs.normalize(action.project_root))
 80 |           if utils.is_directory(project_root) then
 81 |             vim.list_extend(args, { "--project_root", project_root })
 82 |           else
 83 |             return { status = "error", data = "Invalid path " .. project_root }
 84 |           end
 85 |         end
 86 |         if
 87 |           vim.iter(action.paths):any(function(p)
 88 |             return utils.is_directory(p)
 89 |           end)
 90 |         then
 91 |           return {
 92 |             status = "error",
 93 |             data = "Please only supply paths to files as the `paths` parameter, not directories.",
 94 |           }
 95 |         end
 96 | 
 97 |         vim.list_extend(args, action.paths)
 98 |         job_runner.run_async(
 99 |           args,
100 |           ---@param result VectoriseResult
101 |           function(result, error, code, _)
102 |             if result then
103 |               cb({ status = "success", data = result })
104 |             else
105 |               cb({ status = "error", data = { error = error, code = code } })
106 |             end
107 |           end,
108 |           tools.chat.bufnr
109 |         )
110 |       end,
111 |     },
112 |     output = {
113 |       ---@param self CodeCompanion.Tools.Tool
114 |       prompt = function(self, _)
115 |         return string.format("Vectorise %d files with VectorCode?", #self.args.paths)
116 |       end,
117 |       ---@param self CodeCompanion.Tools.Tool
118 |       ---@param tools CodeCompanion.Tools
119 |       ---@param cmd VectoriseToolArgs
120 |       error = function(self, tools, cmd, stderr)
121 |         logger.error(
122 |           ("CodeCompanion tool with command %s thrown with the following error: %s"):format(
123 |             vim.inspect(cmd),
124 |             vim.inspect(stderr)
125 |           )
126 |         )
127 |         stderr = utils.flatten_table_to_string(stderr, "Unknown error.")
128 |         tools.chat:add_tool_output(
129 |           self,
130 |           string.format("**VectorCode `vectorise` Tool: %s", stderr)
131 |         )
132 |       end,
133 |       ---@param self CodeCompanion.Tools.Tool
134 |       ---@param tools CodeCompanion.Tools
135 |       ---@param cmd VectoriseToolArgs
136 |       ---@param stdout VectorCode.VectoriseResult[]
137 |       success = function(self, tools, cmd, stdout)
138 |         stdout = stdout[#stdout]
139 |         tools.chat:add_tool_output(
140 |           self,
141 |           string.format(
142 |             [[**VectorCode `vectorise` Tool**:
143 |   - New files added: %d
144 |   - Existing files updated: %d
145 |   - Orphaned files removed: %d
146 |   - Up-to-date files skipped: %d
147 |   - Failed to decode: %d
148 |   ]],
149 |             stdout.add or 0,
150 |             stdout.update or 0,
151 |             stdout.removed or 0,
152 |             stdout.skipped or 0,
153 |             stdout.failed or 0
154 |           )
155 |         )
156 |         if cmd.project_root and cmd.project_root then
157 |           tools.chat:add_tool_output(
158 |             self,
159 |             string.format("\nThe files were added to `%s`", cmd.project_root),
160 |             ""
161 |           )
162 |         end
163 |       end,
164 |     },
165 |   }
166 | end
167 | 


--------------------------------------------------------------------------------
/lua/vectorcode/utils.lua:
--------------------------------------------------------------------------------
  1 | local M = {}
  2 | 
  3 | local function traverse(node, cb)
  4 |   if node == nil then
  5 |     return
  6 |   end
  7 |   if node.result ~= nil then
  8 |     traverse(node.result, cb)
  9 |   end
 10 |   if vim.isarray(node) then
 11 |     for _, v in pairs(node) do
 12 |       traverse(v, cb)
 13 |     end
 14 |     return
 15 |   end
 16 |   if vim.isarray(node.children) then
 17 |     for _, v in pairs(node.children) do
 18 |       traverse(v, cb)
 19 |     end
 20 |   end
 21 |   if not vim.list_contains({ 15, 16, 20, 21, 25 }, node.kind) then
 22 |     -- exclude certain kinds.
 23 |     if cb then
 24 |       cb(node)
 25 |     end
 26 |   end
 27 | end
 28 | 
 29 | ---@alias VectorCode.QueryCallback fun(bufnr:integer?):string|string[]
 30 | 
 31 | ---Retrieves all LSP document symbols from the current buffer, and use the symbols
 32 | ---as query messages. Fallbacks to `make_surrounding_lines_cb` if
 33 | ---`textDocument_documentSymbol` is not accessible.
 34 | ---@return VectorCode.QueryCallback
 35 | function M.make_lsp_document_symbol_cb()
 36 |   return function(bufnr)
 37 |     if bufnr == 0 or bufnr == nil then
 38 |       bufnr = vim.api.nvim_get_current_buf()
 39 |     end
 40 |     local has_documentSymbol = false
 41 |     for _, client in ipairs(vim.lsp.get_clients({ bufnr = bufnr })) do
 42 |       if client.server_capabilities.documentSymbolProvider then
 43 |         has_documentSymbol = true
 44 |       end
 45 |     end
 46 |     if not has_documentSymbol then
 47 |       return M.make_surrounding_lines_cb(-1)(bufnr)
 48 |     end
 49 | 
 50 |     local result, _ = vim.lsp.buf_request_sync(
 51 |       0,
 52 |       vim.lsp.protocol.Methods.textDocument_documentSymbol,
 53 |       { textDocument = vim.lsp.util.make_text_document_params(bufnr) }
 54 |     )
 55 |     if result ~= nil then
 56 |       local symbols = {}
 57 |       traverse(result, function(node)
 58 |         if node.name ~= nil then
 59 |           vim.list_extend(symbols, { node.name })
 60 |         end
 61 |       end)
 62 |       return symbols
 63 |     else
 64 |       return M.make_surrounding_lines_cb(20)(bufnr)
 65 |     end
 66 |   end
 67 | end
 68 | 
 69 | ---Use the lines above and below the current line as the query messages.
 70 | ---@param num_of_lines integer The number of lines to include in the query.
 71 | ---@return VectorCode.QueryCallback
 72 | function M.make_surrounding_lines_cb(num_of_lines)
 73 |   return function(bufnr)
 74 |     if bufnr == 0 or bufnr == nil then
 75 |       bufnr = vim.api.nvim_get_current_buf()
 76 |     end
 77 |     if num_of_lines <= 0 then
 78 |       return table.concat(vim.api.nvim_buf_get_lines(bufnr, 0, -1, false), "\n")
 79 |     end
 80 |     local cursor_line = vim.api.nvim_win_get_cursor(0)[1]
 81 |     local start_line = cursor_line - math.floor(num_of_lines / 2)
 82 |     if start_line < 1 then
 83 |       start_line = 1
 84 |     end
 85 |     return table.concat(
 86 |       vim.api.nvim_buf_get_lines(
 87 |         bufnr,
 88 |         start_line - 1,
 89 |         start_line + num_of_lines - 1,
 90 |         false
 91 |       ),
 92 |       "\n"
 93 |     )
 94 |   end
 95 | end
 96 | 
 97 | ---@param path string|integer
 98 | ---@return string?
 99 | function M.find_root(path)
100 |   return vim.fs.root(path, ".vectorcode") or vim.fs.root(path, ".git")
101 | end
102 | 
103 | ---@param str string
104 | ---@param sep string?
105 | ---@return string[]
106 | local function split(str, sep)
107 |   if sep == nil then
108 |     sep = " "
109 |   end
110 |   local result = {}
111 |   local pattern = "([^" .. sep .. "]+)"
112 |   for part in string.gmatch(str, pattern) do
113 |     table.insert(result, part)
114 |   end
115 |   return result
116 | end
117 | 
118 | --- This function build a `VectorCode.QueryCallback` by extracting recent changes from the `:changes` command.
119 | ---@param max_num integer? Default is 50
120 | ---@return VectorCode.QueryCallback
121 | function M.make_changes_cb(max_num)
122 |   if max_num == nil then
123 |     max_num = 50
124 |   end
125 |   return function(bufnr)
126 |     ---@type string?
127 |     local raw_changes = vim.api.nvim_exec2("changes", { output = true }).output
128 |     if raw_changes == nil then
129 |       -- fallback to other cb
130 |       return M.make_surrounding_lines_cb(-1)(bufnr)
131 |     end
132 |     local lines = vim.tbl_map(function(s)
133 |       local res = string.gsub(s, "^[%d%s]+", "")
134 |       return res
135 |     end, split(raw_changes, "\n"))
136 |     local results = {}
137 |     local seen = {} -- deduplicate
138 |     for i = #lines - 1, 2, -1 do
139 |       if #results <= max_num then
140 |         if not seen[lines[i]] then
141 |           table.insert(results, lines[i])
142 |           seen[lines[i]] = true
143 |         end
144 |       else
145 |         break
146 |       end
147 |     end
148 |     if #results == 0 then
149 |       -- fallback to other cb
150 |       return M.make_surrounding_lines_cb(-1)(bufnr)
151 |     end
152 |     return results
153 |   end
154 | end
155 | 
156 | ---@param f string
157 | ---@return boolean
158 | function M.is_file(f)
159 |   if type(f) ~= "string" then
160 |     return false
161 |   end
162 |   local stats = vim.uv.fs_stat(f)
163 |   return stats and (stats.type == "file") or false
164 | end
165 | 
166 | ---@param f string
167 | ---@return boolean
168 | function M.is_directory(f)
169 |   if type(f) ~= "string" then
170 |     return false
171 |   end
172 |   local stats = vim.uv.fs_stat(f)
173 |   return stats and (stats.type == "directory") or false
174 | end
175 | 
176 | ---@param t table|string|nil
177 | ---@param fallback string?
178 | ---@return string
179 | M.flatten_table_to_string = function(t, fallback)
180 |   fallback = fallback or ""
181 |   if t == nil then
182 |     return fallback
183 |   end
184 |   if type(t) == "string" then
185 |     return t
186 |   end
187 | 
188 |   -- Handle empty tables or tables with empty strings
189 |   local flattened = vim
190 |     .iter(t)
191 |     :flatten(math.huge)
192 |     :filter(function(item)
193 |       return type(item) == "string" and vim.trim(item) ~= ""
194 |     end)
195 |     :totable()
196 | 
197 |   if #flattened == 0 then
198 |     return fallback
199 |   end
200 | 
201 |   return table.concat(flattened, "\n")
202 | end
203 | 
204 | ---Convert any `vim.NIL` instances to `nil` in lua.
205 | ---@generic Obj: any
206 | ---@param obj Obj
207 | ---@return Obj
208 | function M.fix_nil(obj)
209 |   if obj == vim.NIL then
210 |     return nil
211 |   end
212 |   if type(obj) == "table" then
213 |     for k, v in pairs(obj) do
214 |       obj[k] = M.fix_nil(v)
215 |     end
216 |   end
217 |   return obj
218 | end
219 | 
220 | return M
221 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # VectorCode
  2 | 
  3 | [![codecov](https://codecov.io/github/Davidyz/VectorCode/branch/main/graph/badge.svg?token=TWXLOUGG66)](https://codecov.io/github/Davidyz/VectorCode)
  4 | [![Test and Coverage](https://github.com/Davidyz/VectorCode/actions/workflows/test_and_cov.yml/badge.svg)](https://github.com/Davidyz/VectorCode/actions/workflows/test_and_cov.yml)
  5 | [![pypi](https://img.shields.io/pypi/v/vectorcode.svg)](https://pypi.org/project/vectorcode/)
  6 | 
  7 | VectorCode is a code repository indexing tool. It helps you build better prompt
  8 | for your coding LLMs by indexing and providing information about the code
  9 | repository you're working on. This repository also contains the corresponding
 10 | neovim plugin that provides a set of APIs for you to build or enhance AI plugins,
 11 | and integrations for some of the popular plugins.
 12 | 
 13 | > [!NOTE]
 14 | > This project is in beta quality and is undergoing rapid iterations.
 15 | > I know there are plenty of rooms for improvements, and any help is welcomed.
 16 | 
 17 | <!-- mtoc-start -->
 18 | 
 19 | * [Why VectorCode?](#why-vectorcode)
 20 | * [Documentation](#documentation)
 21 |   * [About Versioning](#about-versioning)
 22 | * [TODOs](#todos)
 23 | * [Credit](#credit)
 24 |   * [Special Thanks](#special-thanks)
 25 | * [Star History](#star-history)
 26 | 
 27 | <!-- mtoc-end -->
 28 | 
 29 | ## Why VectorCode?
 30 | LLMs usually have very limited understanding about close-source projects, projects
 31 | that are not well-known, and cutting edge developments that have not made it into
 32 | releases. Their capabilities on these projects are quite limited. With
 33 | VectorCode, you can easily (and programmatically) inject task-relevant context
 34 | from the project into the prompt. This significantly improves the quality of the
 35 | model output and reduce hallucination.
 36 | 
 37 | [![asciicast](https://asciinema.org/a/8WP8QJHNAR9lEllZSSx3poLPD.svg)](https://asciinema.org/a/8WP8QJHNAR9lEllZSSx3poLPD?t=3)
 38 | 
 39 | ## Documentation
 40 | 
 41 | > [!NOTE]
 42 | > The documentation on the `main` branch reflects the code on the latest commit. 
 43 | > To check for the documentation for the version you're using, you can [check out
 44 | > the corresponding tags](https://github.com/Davidyz/VectorCode/tags).
 45 | 
 46 | - For the setup and usage of the command-line tool, see [the CLI documentation](./docs/cli.md);
 47 | - For neovim users, after you've gone through the CLI documentation, please refer to 
 48 |   [the neovim plugin documentation](./docs/neovim/README.md) (and optionally the [lua API reference](./docs/neovim/api_references.md)) 
 49 |   for further instructions.
 50 | - Additional resources:
 51 |   - the [wiki](https://github.com/Davidyz/VectorCode/wiki) for extra tricks and
 52 |     tips that will help you get the most out of VectorCode;
 53 |   - the [discussions](https://github.com/Davidyz/VectorCode/discussions) where
 54 |     you can ask general questions and share your cool usages about VectorCode.
 55 |   - If you're feeling adanvturous, feel free to check out 
 56 |     [the pull requests](https://github.com/Davidyz/VectorCode/pulls) for
 57 |     WIP features.
 58 | 
 59 | If you're trying to contribute to this project, take a look at [the contribution
 60 | guide](./docs/CONTRIBUTING.md), which contains information about some basic
 61 | guidelines that you should follow and tips that you may find helpful.
 62 | 
 63 | ### About Versioning
 64 | 
 65 | This project follows an adapted semantic versioning:
 66 | 
 67 | - Until 1.0.0 is released, the _major version number_ stays 0 which indicates that
 68 |   this project is still in early stage, and features/interfaces may change from 
 69 |   time to time;
 70 | - The _minor version number_ indicates __breaking changes__. When I decide to remove a
 71 |   feature/config option, the actual removal will happen when I bump the minor
 72 |   version number. Therefore, if you want to avoid breaking a working setup, you
 73 |   may choose to use a version constraint like `"vectorcode<0.7.0"`;
 74 | - The _patch version number_ indicates __non-breaking changes__. This can include new
 75 |   features and bug fixes. When I decide to deprecate things, I will make a new
 76 |   release with bumped patch version. Until the minor version number is bumped,
 77 |   the deprecated feature will still work but you'll see a warning. It's
 78 |   recommended to update your setup to adapt the new features.
 79 | 
 80 | ## TODOs
 81 | - [x] query by ~file path~ excluded paths;
 82 | - [x] chunking support;
 83 |   - [x] add metadata for files;
 84 |   - [x] chunk-size configuration;
 85 |   - [x] smarter chunking (semantics/syntax based), implemented with
 86 |     [py-tree-sitter](https://github.com/tree-sitter/py-tree-sitter) and
 87 |     [tree-sitter-language-pack](https://github.com/Goldziher/tree-sitter-language-pack);
 88 |   - [x] configurable document selection from query results.
 89 | - [x] ~NeoVim Lua API with cache to skip the retrieval when a project has not
 90 |   been indexed~ Returns empty array instead;
 91 | - [x] job pool for async caching;
 92 | - [x] [persistent-client](https://docs.trychroma.com/docs/run-chroma/persistent-client);
 93 | - [ ] proper [remote Chromadb](https://docs.trychroma.com/production/administration/auth) support (with authentication, etc.);
 94 | - [x] respect `.gitignore`;
 95 | - [x] implement some sort of project-root anchors (such as `.git` or a custom
 96 |   `.vectorcode.json`) that enhances automatic project-root detection.
 97 |   **Implemented project-level `.vectorcode/` and `.git` as root anchor**
 98 | - [x] ability to view and delete files in a collection;
 99 | - [x] joint search (kinda, using codecompanion.nvim/MCP);
100 | - [x] Nix support (unofficial packages [here](https://search.nixos.org/packages?channel=unstable&from=0&size=50&sort=relevance&type=packages&query=vectorcode));
101 | - [ ] Query rewriting (#124).
102 | 
103 | 
104 | ## Credit
105 | 
106 | - [@milanglacier](https://github.com/milanglacier) (and [minuet-ai.nvim](https://github.com/milanglacier/minuet-ai.nvim)) for the support when this project was still in early stage;
107 | - [@olimorris](https://github.com/olimorris) for the help (personally and
108 |   from [codecompanion.nvim](https://github.com/olimorris/codecompanion.nvim))
109 |   when this project made initial attempts at tool-calling;
110 | - [@ravitemer](https://github.com/ravitemer) for the help to interface
111 |   VectorCode with [MCP](https://modelcontextprotocol.io/introduction);
112 | - The nix community (especially [@sarahec](https://github.com/sarahec) and [@GaetanLepage](https://github.com/GaetanLepage))
113 |   for maintaining the nix packages.
114 | 
115 | ### Special Thanks
116 | [![JetBrains logo.](https://resources.jetbrains.com/storage/products/company/brand/logos/jetbrains.svg)](https://jb.gg/OpenSource)
117 | 
118 | ## Star History
119 | 
120 | [![Star History Chart](https://api.star-history.com/svg?repos=Davidyz/VectorCode&type=Date)](https://www.star-history.com/#Davidyz/VectorCode&Date)
121 | 


--------------------------------------------------------------------------------
/lua/vectorcode/config.lua:
--------------------------------------------------------------------------------
  1 | local log_level = os.getenv("VECTORCODE_NVIM_LOG_LEVEL")
  2 | if log_level == nil then
  3 |   log_level = "error"
  4 | else
  5 |   log_level = log_level:lower()
  6 | end
  7 | local logger = require("plenary.log").new({
  8 |   plugin = "vectorcode.nvim",
  9 |   level = log_level,
 10 |   use_console = log_level ~= nil and "async" or false,
 11 |   use_file = log_level ~= nil,
 12 | })
 13 | 
 14 | local cacher = nil
 15 | 
 16 | ---@type VectorCode.Opts
 17 | local config = {
 18 |   cli_cmds = {
 19 |     vectorcode = "vectorcode",
 20 |   },
 21 |   async_opts = {
 22 |     debounce = 10,
 23 |     events = { "BufWritePost", "InsertEnter", "BufReadPost" },
 24 |     exclude_this = true,
 25 |     n_query = 1,
 26 |     notify = false,
 27 |     query_cb = require("vectorcode.utils").make_surrounding_lines_cb(-1),
 28 |     run_on_register = false,
 29 |     single_job = false,
 30 |     timeout_ms = 5000,
 31 |   },
 32 |   async_backend = "default",
 33 |   exclude_this = true,
 34 |   n_query = 1,
 35 |   notify = true,
 36 |   timeout_ms = 5000,
 37 |   on_setup = { update = false, lsp = false },
 38 |   sync_log_env_var = false,
 39 | }
 40 | 
 41 | local setup_config = vim.deepcopy(config, true)
 42 | 
 43 | ---@return vim.lsp.ClientConfig
 44 | local lsp_configs = function()
 45 |   ---@type vim.lsp.ClientConfig
 46 |   local cfg =
 47 |     { cmd = { "vectorcode-server" }, root_markers = { ".vectorcode", ".git" } } -- NOTE: This can be overriden by `vim.lsp.config`
 48 |   if vim.lsp.config ~= nil and vim.lsp.config.vectorcode_server ~= nil then
 49 |     -- nvim >= 0.11.0
 50 |     cfg = vim.tbl_deep_extend("force", cfg, vim.lsp.config.vectorcode_server)
 51 |     logger.debug("Using vim.lsp.config.vectorcode_server for LSP config:\n", cfg)
 52 |   end
 53 |   cfg.name = "vectorcode_server"
 54 |   if setup_config.sync_log_env_var then
 55 |     local level = os.getenv("VECTORCODE_NVIM_LOG_LEVEL") or nil
 56 |     if level ~= nil then
 57 |       level = string.upper(level)
 58 |       if level == "TRACE" then
 59 |         -- there's no `TRACE` in python logging
 60 |         level = "DEBUG"
 61 |       end
 62 |       cfg.cmd_env["VECTORCODE_LOG_LEVEL"] = level
 63 |     end
 64 |   end
 65 |   return cfg
 66 | end
 67 | 
 68 | local notify_opts = { title = "VectorCode" }
 69 | 
 70 | ---@param opts {notify:boolean}?
 71 | local has_cli = function(opts)
 72 |   opts = opts or { notify = false }
 73 |   local ok = vim.fn.executable(setup_config.cli_cmds.vectorcode) == 1
 74 |   if not ok and opts.notify then
 75 |     vim.notify("VectorCode CLI is not executable!", vim.log.levels.ERROR, notify_opts)
 76 |   end
 77 |   return ok
 78 | end
 79 | 
 80 | ---@generic T: function
 81 | ---@param func T
 82 | ---@return T
 83 | local check_cli_wrap = function(func)
 84 |   if not has_cli() then
 85 |     vim.notify("VectorCode CLI is not executable!", vim.log.levels.ERROR, notify_opts)
 86 |   end
 87 |   return func
 88 | end
 89 | 
 90 | --- Handles startup actions.
 91 | ---@param configs VectorCode.Opts
 92 | local startup_handler = check_cli_wrap(function(configs)
 93 |   if configs.on_setup.update then
 94 |     require("vectorcode").check("config", function(out)
 95 |       if out.code == 0 then
 96 |         local path = string.gsub(out.stdout, "^%s*(.-)%s*$", "%1")
 97 |         if path ~= "" then
 98 |           logger.info("Running `vectorcode update` on start up.")
 99 |           require("vectorcode").update(path)
100 |         end
101 |       end
102 |     end)
103 |   end
104 |   if configs.on_setup.lsp then
105 |     local ok, runner = pcall(require, "vectorcode.jobrunner.lsp")
106 |     if not ok or not type(runner) == "table" or runner == nil then
107 |       vim.notify("Failed to start vectorcode-server.", vim.log.levels.WARN, notify_opts)
108 |       logger.error("Failed to start vectorcode-server.")
109 |       return
110 |     end
111 |     runner.init()
112 |   end
113 | end)
114 | 
115 | return {
116 |   get_default_config = function()
117 |     return vim.deepcopy(config, true)
118 |   end,
119 | 
120 |   setup = check_cli_wrap(
121 |     ---@param opts VectorCode.Opts?
122 |     function(opts)
123 |       logger.info("Received setup opts:\n", opts)
124 |       opts = opts or {}
125 |       setup_config = vim.tbl_deep_extend("force", config, opts or {})
126 |       for k, _ in pairs(setup_config.async_opts) do
127 |         if
128 |           setup_config[k] ~= nil
129 |           and (opts.async_opts == nil or opts.async_opts[k] == nil)
130 |         then
131 |           -- NOTE: a lot of options are mutual between `setup_config` and `async_opts`.
132 |           -- If users do not explicitly set them `async_opts`, copy them from `setup_config`.
133 |           setup_config.async_opts = vim.tbl_deep_extend(
134 |             "force",
135 |             setup_config.async_opts,
136 |             { [k] = setup_config[k] }
137 |           )
138 |         end
139 |       end
140 |       setup_config.cli_cmds.vectorcode =
141 |         vim.fs.normalize(setup_config.cli_cmds.vectorcode)
142 |       startup_handler(setup_config)
143 |       logger.info("Finished processing opts:\n", setup_config)
144 |     end
145 |   ),
146 | 
147 |   ---@return VectorCode.CacheBackend
148 |   get_cacher_backend = function()
149 |     if cacher ~= nil then
150 |       return cacher
151 |     end
152 |     if setup_config.async_backend == "lsp" then
153 |       local ok, lsp_cacher = pcall(require, "vectorcode.cacher.lsp")
154 |       if ok and type(lsp_cacher) == "table" then
155 |         logger.debug("Using LSP backend for cacher.")
156 |         cacher = lsp_cacher
157 |         return cacher
158 |       else
159 |         vim.notify("Falling back to default backend.", vim.log.levels.WARN, notify_opts)
160 |         logger.warn("Fallback to default (cmd) backend for cacher.")
161 |         setup_config.async_backend = "default"
162 |       end
163 |     end
164 | 
165 |     if setup_config.async_backend ~= "default" then
166 |       vim.notify(
167 |         ("Unrecognised vectorcode backend: %s! Falling back to `default`."):format(
168 |           setup_config.async_backend
169 |         ),
170 |         vim.log.levels.ERROR,
171 |         notify_opts
172 |       )
173 |       logger.warn("Fallback to default (cmd) backend for cacher.")
174 |       setup_config.async_backend = "default"
175 |     end
176 |     logger.debug("Defaulting to cmd backend for cacher.")
177 |     cacher = require("vectorcode.cacher.default")
178 |     return cacher
179 |   end,
180 | 
181 |   ---@return VectorCode.Opts
182 |   get_user_config = function()
183 |     return vim.deepcopy(setup_config, true)
184 |   end,
185 |   ---@return VectorCode.QueryOpts
186 |   get_query_opts = function()
187 |     return {
188 |       exclude_this = setup_config.exclude_this,
189 |       n_query = setup_config.n_query,
190 |       notify = setup_config.notify,
191 |       timeout_ms = setup_config.timeout_ms,
192 |     }
193 |   end,
194 |   notify_opts = notify_opts,
195 | 
196 |   ---@return boolean
197 |   has_cli = has_cli,
198 | 
199 |   check_cli_wrap = check_cli_wrap,
200 | 
201 |   lsp_configs = lsp_configs,
202 |   logger = logger,
203 | }
204 | 


--------------------------------------------------------------------------------
/tests/subcommands/test_ls.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import socket
  3 | from unittest.mock import AsyncMock, MagicMock, patch
  4 | 
  5 | import pytest
  6 | import tabulate
  7 | 
  8 | from vectorcode.cli_utils import Config
  9 | from vectorcode.subcommands.ls import get_collection_list, ls
 10 | 
 11 | 
 12 | @pytest.mark.asyncio
 13 | async def test_get_collection_list():
 14 |     mock_client = AsyncMock()
 15 |     mock_collection1 = AsyncMock()
 16 |     mock_collection1.name = "test_collection_1"
 17 |     mock_collection1.metadata = {
 18 |         "path": "/test/path1",
 19 |         "username": "test_user",
 20 |         "embedding_function": "test_ef",
 21 |     }
 22 |     mock_collection1.count.return_value = 100
 23 |     mock_collection1.get.return_value = {
 24 |         "metadatas": [
 25 |             {"path": "/test/path1/file1.txt"},
 26 |             {"path": "/test/path1/file2.txt"},
 27 |             None,
 28 |         ]
 29 |     }
 30 |     mock_collection2 = AsyncMock()
 31 |     mock_collection2.name = "test_collection_2"
 32 |     mock_collection2.metadata = {
 33 |         "path": "/test/path2",
 34 |         "username": "test_user",
 35 |         "embedding_function": "test_ef",
 36 |     }
 37 |     mock_collection2.count.return_value = 200
 38 |     mock_collection2.get.return_value = {
 39 |         "metadatas": [
 40 |             {"path": "/test/path2/file1.txt"},
 41 |             {"path": "/test/path2/file2.txt"},
 42 |         ]
 43 |     }
 44 | 
 45 |     async def mock_get_collections(client):
 46 |         yield mock_collection1
 47 |         yield mock_collection2
 48 | 
 49 |     with patch("vectorcode.subcommands.ls.get_collections", new=mock_get_collections):
 50 |         result = await get_collection_list(mock_client)
 51 | 
 52 |     assert len(result) == 2
 53 |     assert result[0]["project-root"] == "/test/path1"
 54 |     assert result[0]["user"] == "test_user"
 55 |     assert result[0]["hostname"] == socket.gethostname()
 56 |     assert result[0]["collection_name"] == "test_collection_1"
 57 |     assert result[0]["size"] == 100
 58 |     assert result[0]["embedding_function"] == "test_ef"
 59 |     assert result[0]["num_files"] == 2
 60 |     assert result[1]["num_files"] == 2
 61 | 
 62 | 
 63 | @pytest.mark.asyncio
 64 | async def test_ls_pipe_mode(capsys):
 65 |     mock_client = AsyncMock()
 66 |     mock_collection = AsyncMock()
 67 |     mock_collection.name = "test_collection"
 68 |     mock_collection.metadata = {
 69 |         "path": "/test/path",
 70 |         "username": "test_user",
 71 |         "embedding_function": "test_ef",
 72 |     }
 73 |     mock_collection.count.return_value = 50
 74 |     mock_collection.get.return_value = {"metadatas": [{"path": "/test/path/file.txt"}]}
 75 | 
 76 |     async def mock_get_collections(client):
 77 |         yield mock_collection
 78 | 
 79 |     with (
 80 |         patch("vectorcode.subcommands.ls.ClientManager") as MockClientManager,
 81 |         patch(
 82 |             "vectorcode.subcommands.ls.get_collection_list",
 83 |             return_value=[
 84 |                 {
 85 |                     "project-root": "/test/path",
 86 |                     "size": 50,
 87 |                     "num_files": 1,
 88 |                     "embedding_function": "test_ef",
 89 |                 }
 90 |             ],
 91 |         ),
 92 |     ):
 93 |         mock_client = MagicMock()
 94 |         mock_client_manager = MockClientManager.return_value
 95 |         mock_client_manager._create_client = AsyncMock(return_value=mock_client)
 96 | 
 97 |         config = Config(pipe=True)
 98 |         await ls(config)
 99 |         captured = capsys.readouterr()
100 |         expected_output = (
101 |             json.dumps(
102 |                 [
103 |                     {
104 |                         "project-root": "/test/path",
105 |                         "size": 50,
106 |                         "num_files": 1,
107 |                         "embedding_function": "test_ef",
108 |                     }
109 |                 ]
110 |             )
111 |             + "\n"
112 |         )
113 |         assert captured.out == expected_output
114 | 
115 | 
116 | @pytest.mark.asyncio
117 | async def test_ls_table_mode(capsys, monkeypatch):
118 |     mock_client = AsyncMock()
119 |     mock_collection = AsyncMock()
120 |     mock_collection.name = "test_collection"
121 |     mock_collection.metadata = {
122 |         "path": "/test/path",
123 |         "username": "test_user",
124 |         "embedding_function": "test_ef",
125 |     }
126 |     mock_collection.count.return_value = 50
127 |     mock_collection.get.return_value = {"metadatas": [{"path": "/test/path/file.txt"}]}
128 | 
129 |     async def mock_get_collections(client):
130 |         yield mock_collection
131 | 
132 |     with (
133 |         patch("vectorcode.subcommands.ls.ClientManager") as MockClientManager,
134 |         patch(
135 |             "vectorcode.subcommands.ls.get_collection_list",
136 |             return_value=[
137 |                 {
138 |                     "project-root": "/test/path",
139 |                     "size": 50,
140 |                     "num_files": 1,
141 |                     "embedding_function": "test_ef",
142 |                 }
143 |             ],
144 |         ),
145 |     ):
146 |         mock_client = MagicMock()
147 |         mock_client_manager = MockClientManager.return_value
148 |         mock_client_manager._create_client = AsyncMock(return_value=mock_client)
149 | 
150 |         config = Config(pipe=False)
151 |         await ls(config)
152 |         captured = capsys.readouterr()
153 |         expected_output = (
154 |             tabulate.tabulate(
155 |                 [["/test/path", 50, 1, "test_ef"]],
156 |                 headers=[
157 |                     "Project Root",
158 |                     "Collection Size",
159 |                     "Number of Files",
160 |                     "Embedding Function",
161 |                 ],
162 |             )
163 |             + "\n"
164 |         )
165 |         assert captured.out == expected_output
166 | 
167 |     # Test with HOME environment variable set
168 |     monkeypatch.setenv("HOME", "/test")
169 |     with (
170 |         patch("vectorcode.subcommands.ls.ClientManager") as MockClientManager,
171 |         patch(
172 |             "vectorcode.subcommands.ls.get_collection_list",
173 |             return_value=[
174 |                 {
175 |                     "project-root": "/test/path",
176 |                     "size": 50,
177 |                     "num_files": 1,
178 |                     "embedding_function": "test_ef",
179 |                 }
180 |             ],
181 |         ),
182 |     ):
183 |         mock_client = MagicMock()
184 |         mock_client_manager = MockClientManager.return_value
185 |         mock_client_manager._create_client = AsyncMock(return_value=mock_client)
186 |         config = Config(pipe=False)
187 |         await ls(config)
188 |         captured = capsys.readouterr()
189 |         expected_output = (
190 |             tabulate.tabulate(
191 |                 [["~/path", 50, 1, "test_ef"]],
192 |                 headers=[
193 |                     "Project Root",
194 |                     "Collection Size",
195 |                     "Number of Files",
196 |                     "Embedding Function",
197 |                 ],
198 |             )
199 |             + "\n"
200 |         )
201 |         assert captured.out == expected_output
202 | 


--------------------------------------------------------------------------------
/lua/codecompanion/_extensions/vectorcode/init.lua:
--------------------------------------------------------------------------------
  1 | ---@module "codecompanion"
  2 | 
  3 | ---@alias sub_cmd "ls"|"query"|"vectorise"|"files_ls"|"files_rm"
  4 | 
  5 | ---@class VectorCode.CodeCompanion.ExtensionOpts
  6 | ---A table where the keys are the subcommand name (`ls`, `query`, `vectorise`, etc.)
  7 | --- and the values are their config options.
  8 | ---@field tool_opts? table<sub_cmd|"*", VectorCode.CodeCompanion.ToolOpts>
  9 | ---Options related to the `vectorcode_toolbox` tool group
 10 | ---@field tool_group? VectorCode.CodeCompanion.ToolGroupOpts
 11 | ---Prompt library that automatically creates VectorCode collections on local files
 12 | ---and set up prompts to let LLM search from certain directories.
 13 | ---
 14 | ---The keys should be the human-readable name of the prompt (as they'd appear in
 15 | ---the action menu), and values would be `VectorCode.CodeCompanion.PromptFactory.Opts`
 16 | ---objects.
 17 | ---@field prompt_library? table<string, VectorCode.CodeCompanion.PromptFactory.Opts>
 18 | 
 19 | local vc_config = require("vectorcode.config")
 20 | local logger = vc_config.logger
 21 | local utils = require("vectorcode.utils")
 22 | 
 23 | ---@type VectorCode.CodeCompanion.ExtensionOpts|{}
 24 | local default_extension_opts = {
 25 |   ---@type table<sub_cmd, VectorCode.CodeCompanion.ToolOpts|{}>
 26 |   tool_opts = {
 27 |     -- NOTE: the other default opts are defined in the source code files of the tools.
 28 |     -- `include_in_toolbox` is here so that the extension setup works as expected.
 29 |     ls = { include_in_toolbox = true },
 30 |     query = { include_in_toolbox = true },
 31 |     vectorise = {
 32 |       requires_approval = true,
 33 |       require_approval_before = true,
 34 |       include_in_toolbox = true,
 35 |     },
 36 |     files_ls = {},
 37 |     files_rm = { require_approval_before = true, requires_approval = true },
 38 |   },
 39 |   tool_group = { enabled = true, collapse = true, extras = {} },
 40 |   prompt_library = require("vectorcode.integrations.codecompanion.prompts.presets"),
 41 | }
 42 | 
 43 | ---@type sub_cmd[]
 44 | local valid_tools = { "ls", "query", "vectorise", "files_ls", "files_rm" }
 45 | 
 46 | ---@param tool_opts table<sub_cmd|"*", VectorCode.CodeCompanion.ToolOpts>
 47 | ---@return table<sub_cmd, VectorCode.CodeCompanion.ToolOpts>
 48 | local function merge_tool_opts(tool_opts)
 49 |   local wildcard_opts = tool_opts["*"]
 50 |   if wildcard_opts then
 51 |     for tool_name, opts in pairs(tool_opts) do
 52 |       if tool_name ~= "*" then
 53 |         tool_opts[tool_name] = vim.tbl_deep_extend("force", wildcard_opts, opts)
 54 |       end
 55 |     end
 56 |     tool_opts["*"] = nil
 57 |   end
 58 |   ---@cast tool_opts table<sub_cmd, VectorCode.CodeCompanion.ToolOpts>
 59 |   return tool_opts
 60 | end
 61 | 
 62 | ---@type CodeCompanion.Extension
 63 | local M = {
 64 |   ---@param opts VectorCode.CodeCompanion.ExtensionOpts
 65 |   setup = vc_config.check_cli_wrap(function(opts)
 66 |     if
 67 |       opts
 68 |       and opts.tool_opts
 69 |       and vim.iter(opts.tool_opts):any(function(_, v)
 70 |         return v.requires_approval ~= nil
 71 |       end)
 72 |     then
 73 |       vim.deprecate(
 74 |         "requires_approval",
 75 |         "require_approval_before",
 76 |         "1.0.0",
 77 |         "VectorCode",
 78 |         false
 79 |       )
 80 |     end
 81 |     opts = vim.tbl_deep_extend("force", default_extension_opts, opts or {})
 82 |     opts.tool_opts = merge_tool_opts(opts.tool_opts)
 83 |     logger.info("Received codecompanion extension opts:\n", opts)
 84 |     local cc_config = require("codecompanion.config").config
 85 |     local cc_integration = require("vectorcode.integrations").codecompanion
 86 |     local cc_chat_integration = cc_integration.chat
 87 | 
 88 |     local interactions = cc_config.strategies or cc_config.interactions
 89 |     for _, sub_cmd in pairs(valid_tools) do
 90 |       local tool_name = string.format("vectorcode_%s", sub_cmd)
 91 |       if interactions.chat.tools[tool_name] ~= nil then
 92 |         vim.notify(
 93 |           string.format(
 94 |             "There's an existing tool named `%s`. Please either remove it or rename it.",
 95 |             tool_name
 96 |           ),
 97 |           vim.log.levels.ERROR,
 98 |           vc_config.notify_opts
 99 |         )
100 |         logger.warn(
101 |           string.format(
102 |             "Not creating this tool because there is an existing tool named %s.",
103 |             tool_name
104 |           )
105 |         )
106 |       else
107 |         local require_approval = opts.tool_opts[sub_cmd].requires_approval
108 |           or opts.tool_opts[sub_cmd].require_approval_before
109 | 
110 |         interactions.chat.tools[tool_name] = {
111 |           description = string.format("Run VectorCode %s tool", sub_cmd),
112 |           callback = cc_chat_integration.make_tool(sub_cmd, opts.tool_opts[sub_cmd]),
113 |           opts = {
114 |             requires_approval = require_approval,
115 |             require_approval_before = require_approval,
116 |           },
117 |         }
118 |         logger.info(string.format("%s tool has been created.", tool_name))
119 |       end
120 |     end
121 | 
122 |     if opts.tool_group.enabled then
123 |       local included_tools = vim
124 |         .iter(valid_tools)
125 |         :filter(function(cmd_name)
126 |           return opts.tool_opts[cmd_name].include_in_toolbox
127 |         end)
128 |         :map(function(s)
129 |           return "vectorcode_" .. s
130 |         end)
131 |         :totable()
132 |       if opts.tool_group.extras and not vim.tbl_isempty(opts.tool_group.extras) then
133 |         vim.list_extend(included_tools, opts.tool_group.extras)
134 |       end
135 |       logger.info(
136 |         string.format(
137 |           "Loading the following tools into `vectorcode_toolbox` tool group:\n%s",
138 |           vim.inspect(included_tools)
139 |         )
140 |       )
141 |       interactions.chat.tools.groups["vectorcode_toolbox"] = {
142 |         opts = { collapse_tools = opts.tool_group.collapse },
143 |         description = "Use VectorCode to automatically build and retrieve repository-level context.",
144 |         tools = included_tools,
145 |       }
146 |     end
147 | 
148 |     for name, prompt_opts in pairs(opts.prompt_library) do
149 |       if prompt_opts.name ~= nil and prompt_opts.name ~= name then
150 |         vim.notify(
151 |           string.format(
152 |             "The name of `%s` is inconsistent in the opts (`%s`).\nRenaming to `%s`.",
153 |             name,
154 |             prompt_opts.name,
155 |             name
156 |           ),
157 |           vim.log.levels.WARN,
158 |           vc_config.notify_opts
159 |         )
160 |       end
161 |       local project_root = prompt_opts.project_root
162 |       if type(project_root) == "function" then
163 |         project_root = project_root()
164 |       end
165 |       if not utils.is_directory(project_root) then
166 |         vim.notify(
167 |           string.format(
168 |             "`%s` is not a valid directory for CodeCompanion prompt library.\nSkipping `%s`.",
169 |             project_root,
170 |             name
171 |           ),
172 |           vim.log.levels.WARN,
173 |           vc_config.notify_opts
174 |         )
175 |       else
176 |         prompt_opts.name = name
177 |         cc_config.prompt_library[name] =
178 |           cc_chat_integration.prompts.register_prompt(prompt_opts)
179 |       end
180 |     end
181 |   end),
182 | }
183 | 
184 | return M
185 | 


--------------------------------------------------------------------------------
/lua/vectorcode/types.lua:
--------------------------------------------------------------------------------
  1 | ---@module "codecompanion"
  2 | 
  3 | ---Type definition of the retrieval result.
  4 | ---@class VectorCode.QueryResult
  5 | ---@field path string Path to the file
  6 | ---@field document string? Content of the file
  7 | ---@field chunk string?
  8 | ---@field start_line integer?
  9 | ---@field end_line integer?
 10 | ---@field chunk_id string?
 11 | ---@field summary string? Used by the CodeCompanion tool only. Not part of the backend response
 12 | 
 13 | ---@class VectorCode.LsResult
 14 | ---@field project-root string
 15 | 
 16 | ---@class VectorCode.VectoriseResult
 17 | ---@field add integer
 18 | ---@field update integer
 19 | ---@field removed integer
 20 | ---@field skipped integer
 21 | ---@field failed integer
 22 | 
 23 | ---Type definitions for the cache of a buffer.
 24 | ---@class VectorCode.Cache
 25 | ---@field enabled boolean Whether the async jobs are enabled or not. If the buffer is disabled, no cache will be generated for it.
 26 | ---@field job_count integer
 27 | ---@field jobs table<integer, integer> Job handle:time of creation (in seconds)
 28 | ---@field last_run integer? Last time the query ran, in seconds from epoch.
 29 | ---@field options VectorCode.RegisterOpts The options that the buffer was registered with.
 30 | ---@field retrieval VectorCode.QueryResult[]? The latest retrieval.
 31 | 
 32 | ---Type definitions for options accepted by `query` API.
 33 | ---@class VectorCode.QueryOpts
 34 | ---@field exclude_this boolean? Whether to exclude the current buffer. Default: true
 35 | ---@field n_query integer? Number of results.
 36 | ---@field notify boolean? Notify on new results and other key moments.
 37 | ---@field timeout_ms number? Timeout (in milliseconds) for running a vectorcode command. Default: 5000
 38 | 
 39 | ---@class VectorCode.OnSetup Some actions that may be configured to run when `setup` is called.
 40 | ---@field update boolean `vectorcode update`
 41 | ---@field lsp boolean whether to start LSP server on startup (default is to delay it to the first LSP request)
 42 | 
 43 | ---@class VectorCode.CliCmds Cli commands to use
 44 | ---@field vectorcode string vectorcode cli command or full path
 45 | 
 46 | ---Options passed to `setup`.
 47 | ---@class VectorCode.Opts : VectorCode.QueryOpts
 48 | ---@field async_opts VectorCode.RegisterOpts Default options to use for registering a new buffer for async cache.
 49 | ---@field cli_cmds VectorCode.CliCmds
 50 | ---@field on_setup VectorCode.OnSetup
 51 | ---@field async_backend "default"|"lsp"
 52 | ---@field sync_log_env_var boolean Whether to automatically set `VECTORCODE_LOG_LEVEL` when `VECTORCODE_NVIM_LOG_LEVEL` is detected. !! WARNING: THIS MAY RESULT IN EXCESSIVE LOG MESSAGES DUE TO STDERR BEING POPULATED BY CLI LOGS
 53 | 
 54 | ---Options for the registration of an async cache for a buffer.
 55 | ---@class VectorCode.RegisterOpts: VectorCode.QueryOpts
 56 | ---@field debounce? integer Seconds. Default: 10
 57 | ---@field events? string|string[] autocmd events that triggers async jobs. Default: `{"BufWritePost", "InsertEnter", "BufReadPost"}`
 58 | ---@field single_job? boolean Whether to restrict to 1 async job per buffer. Default: false
 59 | ---@field query_cb? VectorCode.QueryCallback Function that accepts the buffer ID and returns the query message(s). Default: `require("vectorcode.utils").make_surrounding_lines_cb(-1)`
 60 | ---@field run_on_register? boolean Whether to run the query when registering. Default: false
 61 | ---@field project_root? string
 62 | 
 63 | ---A unified interface used by `lsp` backend and `default` backend
 64 | ---@class VectorCode.CacheBackend
 65 | ---@field register_buffer fun(bufnr: integer?, opts: VectorCode.RegisterOpts) Register a buffer and create an async cache for it.
 66 | ---@field deregister_buffer fun(bufnr: integer?, opts: {notify: boolean}?) Deregister a buffer and destroy its async cache.
 67 | ---@field query_from_cache fun(bufnr: integer?, opts: {notify: boolean}?): VectorCode.QueryResult[] Get the cached documents.
 68 | ---@field buf_is_registered fun(bufnr: integer?): boolean Checks if a buffer has been registered.
 69 | ---@field buf_job_count fun(bufnr: integer?): integer Returns the number of running jobs in the background.
 70 | ---@field buf_is_enabled fun(bufnr: integer?): boolean Checks if a buffer has been enabled.
 71 | ---@field make_prompt_component fun(bufnr: integer?, component_cb: (fun(result: VectorCode.QueryResult): string)?): {content: string, count: integer} Compile the retrieval results into a string.
 72 | ---@field async_check fun(check_item: string?, on_success: fun(out: vim.SystemCompleted)?, on_failure: fun(out: vim.SystemCompleted)?) Checks if VectorCode has been configured properly for your project.
 73 | 
 74 | --- This class defines the options available to the CodeCompanion tool.
 75 | ---@class VectorCode.CodeCompanion.ToolOpts
 76 | --- Whether to use the LSP backend. Default: `false`
 77 | ---@field use_lsp boolean?
 78 | ---@field requires_approval boolean?
 79 | ---@field require_approval_before boolean?
 80 | --- Whether this tool should be included in `vectorcode_toolbox`
 81 | ---@field include_in_toolbox boolean?
 82 | 
 83 | ---@class VectorCode.CodeCompanion.LsToolOpts: VectorCode.CodeCompanion.ToolOpts
 84 | 
 85 | ---@class VectorCode.CodeCompanion.FilesLsToolOpts: VectorCode.CodeCompanion.ToolOpts
 86 | 
 87 | ---@class VectorCode.CodeCompanion.FilesRmToolOpts: VectorCode.CodeCompanion.ToolOpts
 88 | 
 89 | ---@class VectorCode.CodeCompanion.QueryToolOpts: VectorCode.CodeCompanion.ToolOpts
 90 | --- Maximum number of results provided to the LLM.
 91 | --- You may set this to a table to configure different values for document/chunk mode.
 92 | --- When set to negative values, it means unlimited.
 93 | --- Default: `{ document = -1, chunk = -1 }`
 94 | ---@field max_num integer|{document:integer, chunk: integer}|nil
 95 | --- Default number of results provided to the LLM.
 96 | --- This value is written in the system prompt and tool description.
 97 | --- Users may ask the LLM to request a different number of results in the chat.
 98 | --- You may set this to a table to configure different values for document/chunk mode.
 99 | --- Default: `{ document = 10, chunk = 50 }`
100 | ---@field default_num? integer|{document:integer, chunk: integer}
101 | --- Whether to avoid duplicated references. Default: `true`
102 | ---@field no_duplicate boolean?
103 | --- Whether to send chunks instead of full files to the LLM. Default: `false`
104 | --- > Make sure you adjust `max_num` and `default_num` accordingly.
105 | ---@field chunk_mode? boolean
106 | ---@field summarise? VectorCode.CodeCompanion.SummariseOpts
107 | 
108 | ---@class VectorCode.CodeCompanion.VectoriseToolOpts: VectorCode.CodeCompanion.ToolOpts
109 | 
110 | ---@class VectorCode.CodeCompanion.ToolGroupOpts
111 | ---Whether to register the tool group
112 | ---@field enabled? boolean
113 | ---Whether to show the individual tools in the references
114 | ---@field collapse? boolean
115 | ---Other tools that you'd like to include in `vectorcode_toolbox`
116 | ---@field extras? string[]
117 | 
118 | --- The result of the query tool should be structured in the following table
119 | ---@class VectorCode.CodeCompanion.QueryToolResult
120 | ---@field raw_results VectorCode.QueryResult[]
121 | ---@field count integer
122 | ---@field summary? string
123 | 
124 | ---@class VectorCode.CodeCompanion.SummariseOpts
125 | ---A boolean flag that controls whether summarisation should be enabled.
126 | ---This can also be a function that returns a boolean.
127 | ---In this case, you can use this option to dynamically control whether summarisation is enabled during a chat.
128 | ---
129 | ---This function recieves 2 parameters:
130 | --- - `CodeCompanion.Chat`: the chat object;
131 | --- - `VectorCode.QueryResult[]`: a list of query results.
132 | ---@field enabled? boolean|(fun(chat: CodeCompanion.Chat, results: VectorCode.QueryResult[]):boolean)
133 | ---The adapter used for the summarisation task. When set to `nil`, the adapter from the current chat will be used.
134 | ---@field adapter? string|CodeCompanion.HTTPAdapter|fun():CodeCompanion.HTTPAdapter
135 | ---The system prompt sent to the summariser model.
136 | ---When set to a function, it'll recieve the default system prompt as the only parameter,
137 | ---and should return the new (full) system prompt. This allows you to customise or rewrite the system prompt.
138 | ---@field system_prompt? string|(fun(original_prompt: string): string)
139 | ---When set to true, include the query messages so that the LLM may make task-related summarisations.
140 | ---This happens __after__ the `system_prompt` callback processing
141 | ---@field query_augmented? boolean
142 | 


--------------------------------------------------------------------------------
/lua/vectorcode/init.lua:
--------------------------------------------------------------------------------
  1 | local M = {}
  2 | 
  3 | local vc_config = require("vectorcode.config")
  4 | local utils = require("vectorcode.utils")
  5 | local logger = vc_config.logger
  6 | local get_config = vc_config.get_user_config
  7 | local notify_opts = vc_config.notify_opts
  8 | local jobrunner = require("vectorcode.jobrunner.cmd")
  9 | local notify = vim.schedule_wrap(vim.notify)
 10 | 
 11 | M.query = vc_config.check_cli_wrap(
 12 |   ---This function wraps the `query` subcommand of the VectorCode CLI. When used without the `callback` parameter,
 13 |   ---this function works as a synchronous function and return the results. Otherwise, this function will run async
 14 |   ---and the results are accessible by the `callback` function (the results will be passed as the argument to the
 15 |   ---callback).
 16 |   ---@param query_message string|string[] Query message(s) to send to the `vecctorcode query` command
 17 |   ---@param opts VectorCode.QueryOpts? A table of config options. If nil, the default config or `setup` config will be used.
 18 |   ---@param callback fun(result:VectorCode.QueryResult[])? Use the result async style.
 19 |   ---@return VectorCode.QueryResult[]? An array of results.
 20 |   function(query_message, opts, callback)
 21 |     logger.info("vectorcode.query: ", query_message, opts, callback)
 22 |     opts = vim.tbl_deep_extend("force", vc_config.get_query_opts(), opts or {})
 23 |     if opts.n_query == 0 then
 24 |       if opts.notify then
 25 |         vim.notify("n_query is 0. Not sending queries.")
 26 |       end
 27 |       return {}
 28 |     end
 29 | 
 30 |     ---@type integer?
 31 |     local timeout_ms = opts.timeout_ms
 32 |     if timeout_ms < 1 then
 33 |       timeout_ms = nil
 34 |     end
 35 |     if opts.notify then
 36 |       vim.notify(
 37 |         ("Started retrieving %s documents."):format(tostring(opts.n_query)),
 38 |         vim.log.levels.INFO,
 39 |         notify_opts
 40 |       )
 41 |     end
 42 |     local bufnr = vim.api.nvim_get_current_buf()
 43 |     local args = { "query", "--pipe", "-n", tostring(opts.n_query) }
 44 |     if type(query_message) == "string" then
 45 |       query_message = { query_message }
 46 |     end
 47 |     vim.list_extend(args, query_message)
 48 | 
 49 |     if opts.exclude_this then
 50 |       vim.list_extend(args, { "--exclude", vim.api.nvim_buf_get_name(bufnr) })
 51 |     end
 52 | 
 53 |     logger.debug("vectorcode.query cmd args: ", args)
 54 |     if callback == nil then
 55 |       local result, err = jobrunner.run(args, timeout_ms, bufnr)
 56 |       if err then
 57 |         logger.warn(vim.inspect(err))
 58 |       end
 59 |       logger.debug(result)
 60 |       return result
 61 |     else
 62 |       jobrunner.run_async(args, function(result, error)
 63 |         logger.debug(result)
 64 |         callback(result or {})
 65 |         if error then
 66 |           logger.warn(vim.inspect(error))
 67 |         end
 68 |       end, bufnr)
 69 |     end
 70 |   end
 71 | )
 72 | 
 73 | M.vectorise = vc_config.check_cli_wrap(
 74 |   ---This function wraps the `vectorise` subcommand. By default this function doesn't pass a `--project_root` flag.
 75 |   ---The command will be run from the current working directory, and the normal project root detection logic in the
 76 |   ---CLI will work as normal. You may also pass a `project_root` as the second argument, in which case the
 77 |   ---`--project_root` will be passed.
 78 |   ---@param files string|string[] Files to vectorise.
 79 |   ---@param project_root string? Add the `--project_root` flag and the passed argument to the command.
 80 |   function(files, project_root)
 81 |     logger.info("vectorcode.vectorise: ", files, project_root)
 82 |     local args = { "--pipe", "vectorise" }
 83 |     if
 84 |       project_root ~= nil
 85 |       or (
 86 |         M.check("config", function(obj)
 87 |           if obj.code == 0 then
 88 |             project_root = obj.stdout
 89 |           end
 90 |         end)
 91 |       )
 92 |     then
 93 |       vim.list_extend(args, { "--project_root", project_root })
 94 |     end
 95 |     if type(files) == "string" then
 96 |       files = { files }
 97 |     end
 98 |     local valid_files = {}
 99 |     for k, v in pairs(files) do
100 |       if vim.fn.filereadable(v) == 1 then
101 |         vim.list_extend(valid_files, { files[k] })
102 |       end
103 |     end
104 |     if #valid_files > 0 then
105 |       vim.list_extend(args, valid_files)
106 |     else
107 |       return
108 |     end
109 |     if get_config().notify then
110 |       vim.schedule(function()
111 |         vim.notify(
112 |           ("Vectorising %s"):format(table.concat(files, ", ")),
113 |           vim.log.levels.INFO,
114 |           notify_opts
115 |         )
116 |       end)
117 |     end
118 |     local bufnr = vim.api.nvim_get_current_buf()
119 |     logger.debug("vectorcode.vectorise cmd args: ", args)
120 |     jobrunner.run_async(args, function(result, error)
121 |       if result then
122 |         if vc_config.get_user_config().notify then
123 |           vim.schedule_wrap(vim.notify)(
124 |             "Indexing successful.",
125 |             vim.log.levels.INFO,
126 |             notify_opts
127 |           )
128 |         end
129 |         logger.info("Vectorise result:", vim.inspect(result))
130 |       elseif error then
131 |         vim.schedule_wrap(vim.notify)(
132 |           string.format("Indexing failed:\n%s", vim.inspect(error)),
133 |           vim.log.levels.WARN,
134 |           notify_opts
135 |         )
136 |         logger.warn(vim.inspect(error))
137 |       else
138 |         vim.schedule_wrap(vim.notify)(
139 |           "Indexing failed.",
140 |           vim.log.levels.WARN,
141 |           notify_opts
142 |         )
143 |       end
144 |     end, bufnr)
145 |   end
146 | )
147 | 
148 | ---@param project_root string?
149 | M.update = vc_config.check_cli_wrap(function(project_root)
150 |   logger.info("vectorcode.update: ", project_root)
151 |   local args = { "update" }
152 |   if project_root ~= nil and utils.is_directory(project_root) then
153 |     vim.list_extend(args, { "--project_root", project_root })
154 |   end
155 |   logger.debug("vectorcode.update cmd args: ", args)
156 |   jobrunner.run_async(args, function(result, error)
157 |     if result then
158 |       if vc_config.get_user_config().notify then
159 |         notify("Indexing successful.", vim.log.levels.INFO, notify_opts)
160 |       end
161 |       logger.info("Update result:", vim.inspect(result))
162 |     elseif error then
163 |       notify(
164 |         string.format("Update failed:\n%s", vim.inspect(error)),
165 |         vim.log.levels.WARN,
166 |         notify_opts
167 |       )
168 |       logger.warn(vim.inspect(error))
169 |     else
170 |       notify("Update failed.", vim.log.levels.WARN, notify_opts)
171 |     end
172 |   end, vim.api.nvim_get_current_buf())
173 | 
174 |   if get_config().notify then
175 |     notify("Updating VectorCode embeddings...", vim.log.levels.INFO, notify_opts)
176 |   end
177 | end)
178 | 
179 | ---@param check_item string? See `vectorcode check` documentation.
180 | ---@param stdout_cb fun(stdout: vim.SystemCompleted)? Gives user access to the exit code, stdout and signal.
181 | ---@return boolean
182 | function M.check(check_item, stdout_cb)
183 |   if not vc_config.has_cli() then
184 |     return false
185 |   end
186 |   check_item = check_item or "config"
187 |   local return_code
188 |   jobrunner.run_async({ "check", check_item }, function(result, error, code, signal)
189 |     return_code = code
190 |     if type(stdout_cb) == "function" then
191 |       stdout_cb({
192 |         stdout = utils.flatten_table_to_string(result),
193 |         stderr = utils.flatten_table_to_string(error, "Unknown error."),
194 |         code = code,
195 |         signal = signal,
196 |       })
197 |     end
198 |   end, 0)
199 |   return return_code == 0
200 | end
201 | 
202 | ---@alias prompt_type "ls"|"query"|"vectorise"
203 | ---@param item prompt_type|prompt_type[]|nil
204 | ---@return string[]
205 | M.prompts = vc_config.check_cli_wrap(function(item)
206 |   local args = { "prompts", "-p" }
207 |   if item then
208 |     if type(item) == "string" then
209 |       table.insert(args, item)
210 |     else
211 |       vim.list_extend(args, item)
212 |     end
213 |   end
214 |   local result, error = jobrunner.run(args, -1, 0)
215 |   if result == nil or vim.tbl_isempty(result) then
216 |     logger.warn(vim.inspect(error))
217 |     if vc_config.get_user_config().notify then
218 |       notify(vim.inspect(error))
219 |     end
220 |     return {}
221 |   end
222 |   return vim.iter(result):flatten(math.huge):totable()
223 | end)
224 | 
225 | M.setup = vc_config.setup
226 | return M
227 | 


--------------------------------------------------------------------------------
/tests/subcommands/query/test_reranker.py:
--------------------------------------------------------------------------------
  1 | from typing import cast
  2 | from unittest.mock import MagicMock, patch
  3 | 
  4 | import numpy
  5 | import pytest
  6 | 
  7 | from vectorcode.cli_utils import Config, QueryInclude
  8 | from vectorcode.subcommands.query.reranker import (
  9 |     CrossEncoderReranker,
 10 |     NaiveReranker,
 11 |     RerankerBase,
 12 |     __supported_rerankers,
 13 |     add_reranker,
 14 |     get_available_rerankers,
 15 |     get_reranker,
 16 | )
 17 | from vectorcode.subcommands.query.types import QueryResult
 18 | 
 19 | 
 20 | @pytest.fixture(scope="function")
 21 | def config():
 22 |     return Config(
 23 |         n_result=3,
 24 |         reranker_params={
 25 |             "model_name_or_path": "cross-encoder/ms-marco-MiniLM-L-6-v2",
 26 |             "device": "cpu",
 27 |         },
 28 |         reranker="CrossEncoderReranker",
 29 |         query=["query chunk 1", "query chunk 2"],
 30 |     )
 31 | 
 32 | 
 33 | @pytest.fixture(scope="function")
 34 | def naive_reranker_conf():
 35 |     return Config(
 36 |         n_result=3, reranker="NaiveReranker", query=["query chunk 1", "query chunk 2"]
 37 |     )
 38 | 
 39 | 
 40 | @pytest.fixture(scope="function")
 41 | def query_result() -> list[QueryResult]:
 42 |     return [
 43 |         QueryResult(
 44 |             path="file1.py",
 45 |             chunk=MagicMock(),
 46 |             query=("query chunk 1",),
 47 |             scores=(0.5,),
 48 |         ),
 49 |         QueryResult(
 50 |             path="file2.py",
 51 |             chunk=MagicMock(),
 52 |             query=("query chunk 1",),
 53 |             scores=(0.9,),
 54 |         ),
 55 |         QueryResult(
 56 |             path="file3.py",
 57 |             chunk=MagicMock(),
 58 |             query=("query chunk 1",),
 59 |             scores=(0.3,),
 60 |         ),
 61 |         QueryResult(
 62 |             path="file2.py",
 63 |             chunk=MagicMock(),
 64 |             query=("query chunk 2",),
 65 |             scores=(0.6,),
 66 |         ),
 67 |         QueryResult(
 68 |             path="file4.py",
 69 |             chunk=MagicMock(),
 70 |             query=("query chunk 2",),
 71 |             scores=(0.7,),
 72 |         ),
 73 |         QueryResult(
 74 |             path="file3.py",
 75 |             chunk=MagicMock(),
 76 |             query=("query chunk 2",),
 77 |             scores=(0.2,),
 78 |         ),
 79 |     ]
 80 | 
 81 | 
 82 | @pytest.fixture(scope="function")
 83 | def empty_query_result():
 84 |     return []
 85 | 
 86 | 
 87 | @pytest.fixture(scope="function")
 88 | def query_chunks():
 89 |     return ["query chunk 1", "query chunk 2"]
 90 | 
 91 | 
 92 | def test_reranker_base_method_is_abstract(config):
 93 |     with pytest.raises((NotImplementedError, TypeError)):
 94 |         RerankerBase(config)
 95 | 
 96 | 
 97 | def test_naive_reranker_initialization(naive_reranker_conf):
 98 |     """Test initialization of NaiveReranker"""
 99 |     reranker = NaiveReranker(naive_reranker_conf)
100 |     assert reranker.n_result == 3
101 | 
102 | 
103 | def test_reranker_create(naive_reranker_conf):
104 |     reranker = NaiveReranker.create(naive_reranker_conf)
105 |     assert isinstance(reranker, NaiveReranker)
106 | 
107 | 
108 | def test_reranker_create_fail():
109 |     class TestReranker(RerankerBase):
110 |         def __init__(self, configs, **kwargs):
111 |             raise Exception
112 | 
113 |     with pytest.raises(Exception):
114 |         TestReranker.create(Config())
115 | 
116 | 
117 | @pytest.mark.asyncio
118 | async def test_naive_reranker_rerank(naive_reranker_conf, query_result):
119 |     """Test basic reranking functionality of NaiveReranker"""
120 |     reranker = NaiveReranker(naive_reranker_conf)
121 |     result = await reranker.rerank(query_result)
122 | 
123 |     # Check the result is a list of paths with correct length
124 |     assert isinstance(result, list)
125 |     assert len(result) <= naive_reranker_conf.n_result
126 | 
127 |     # Check all returned items are strings (paths)
128 |     for res in result:
129 |         assert isinstance(res, str)
130 | 
131 | 
132 | @pytest.mark.asyncio
133 | async def test_naive_reranker_rerank_chunks(naive_reranker_conf, query_result):
134 |     """Test basic reranking functionality of NaiveReranker"""
135 |     naive_reranker_conf.include = [QueryInclude.chunk]
136 |     reranker = NaiveReranker(naive_reranker_conf)
137 |     chunks = {i.chunk for i in query_result}
138 |     result = await reranker.rerank(query_result)
139 | 
140 |     # Check the result is a list of paths with correct length
141 |     assert isinstance(result, list)
142 |     assert len(result) <= naive_reranker_conf.n_result
143 | 
144 |     for res in result:
145 |         assert res in chunks
146 | 
147 | 
148 | @pytest.mark.asyncio
149 | async def test_naive_reranker_rerank_empty_result(
150 |     naive_reranker_conf, empty_query_result
151 | ):
152 |     reranker = NaiveReranker(naive_reranker_conf)
153 |     result = await reranker.rerank(empty_query_result)
154 |     assert len(result) == 0
155 | 
156 | 
157 | @patch("sentence_transformers.CrossEncoder")
158 | def test_cross_encoder_reranker_initialization(mock_cross_encoder: MagicMock, config):
159 |     model_name = config.reranker_params["model_name_or_path"]
160 |     reranker = CrossEncoderReranker(config)
161 |     # Verify constructor was called with correct parameters
162 |     mock_cross_encoder.assert_called_once_with(model_name, **config.reranker_params)
163 |     assert reranker.n_result == config.n_result
164 | 
165 | 
166 | @patch("sentence_transformers.CrossEncoder")
167 | def test_cross_encoder_reranker_initialization_fallback_model_name(
168 |     mock_cross_encoder: MagicMock, config
169 | ):
170 |     config.reranker_params = {}
171 |     reranker = CrossEncoderReranker(config)
172 | 
173 |     # Verify constructor was called with correct parameters
174 |     mock_cross_encoder.assert_called_once_with("cross-encoder/ms-marco-MiniLM-L-6-v2")
175 |     assert reranker.n_result == config.n_result
176 | 
177 | 
178 | @pytest.mark.asyncio
179 | @patch("sentence_transformers.CrossEncoder")
180 | async def test_cross_encoder_reranker_rerank(mock_cross_encoder, config, query_result):
181 |     mock_model = MagicMock()
182 |     mock_cross_encoder.return_value = mock_model
183 | 
184 |     mock_model.predict = lambda x: numpy.random.random((len(x),))
185 | 
186 |     reranker = CrossEncoderReranker(config)
187 |     result = await reranker.rerank(query_result)
188 | 
189 |     # Result assertions
190 |     assert isinstance(result, list)
191 |     assert all(isinstance(path, str) for path in result)
192 |     assert len(result) <= config.n_result
193 | 
194 | 
195 | @pytest.mark.asyncio
196 | async def test_naive_reranker_document_selection_logic(
197 |     naive_reranker_conf, query_result
198 | ):
199 |     """Test that NaiveReranker correctly selects documents based on distances"""
200 |     # Create a query result with known distances
201 | 
202 |     reranker = NaiveReranker(naive_reranker_conf)
203 |     result = await reranker.rerank(query_result)
204 | 
205 |     # Check that files are included (exact order depends on implementation details)
206 |     assert len(result) > 0
207 |     # Common files should be present
208 |     assert "file2.py" in result or "file3.py" in result
209 | 
210 | 
211 | def test_get_reranker(config, naive_reranker_conf):
212 |     assert get_reranker(naive_reranker_conf).configs.reranker == "NaiveReranker"
213 | 
214 |     reranker = get_reranker(config)
215 |     assert reranker.configs.reranker == "CrossEncoderReranker"
216 | 
217 |     reranker = cast(CrossEncoderReranker, get_reranker(config))
218 |     assert reranker.configs.reranker == "CrossEncoderReranker", (
219 |         "configs.reranker should fallback to 'CrossEncoderReranker'"
220 |     )
221 | 
222 | 
223 | def test_supported_rerankers_initialization(config, naive_reranker_conf):
224 |     """Test that __supported_rerankers contains the expected default rerankers"""
225 | 
226 |     assert isinstance(get_reranker(config), CrossEncoderReranker)
227 |     assert isinstance(get_reranker(naive_reranker_conf), NaiveReranker)
228 |     assert len(get_available_rerankers()) == 2
229 | 
230 | 
231 | def test_add_reranker_success():
232 |     """Test successful registration of a new reranker"""
233 | 
234 |     original_count = len(get_available_rerankers())
235 | 
236 |     @add_reranker
237 |     class TestReranker(RerankerBase):
238 |         async def compute_similarity(self, results, query_message):
239 |             return []
240 | 
241 |     assert len(get_available_rerankers()) == original_count + 1
242 |     assert "TestReranker" in __supported_rerankers
243 |     assert isinstance(
244 |         get_reranker(Config(reranker="TestReranker", query=["hello world"])),
245 |         TestReranker,
246 |     )
247 |     __supported_rerankers.pop("TestReranker")
248 | 
249 | 
250 | def test_add_reranker_duplicate():
251 |     """Test duplicate reranker registration raises error"""
252 | 
253 |     # First registration should succeed
254 |     @add_reranker
255 |     class TestReranker(RerankerBase):
256 |         async def compute_similarity(self, results, query_message):
257 |             return []
258 | 
259 |     # Second registration should fail
260 |     with pytest.raises(AttributeError):
261 |         add_reranker(TestReranker)
262 |     __supported_rerankers.pop("TestReranker")
263 | 
264 | 
265 | def test_add_reranker_invalid_baseclass():
266 |     """Test that non-RerankerBase classes can't be registered"""
267 | 
268 |     with pytest.raises(TypeError):
269 | 
270 |         @add_reranker
271 |         class InvalidReranker:
272 |             pass
273 | 


--------------------------------------------------------------------------------
/src/vectorcode/subcommands/query/__init__.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | import os
  4 | from typing import Any, cast
  5 | 
  6 | from chromadb import Where
  7 | from chromadb.api.models.AsyncCollection import AsyncCollection
  8 | from chromadb.api.types import IncludeEnum, QueryResult
  9 | from chromadb.errors import InvalidCollectionException, InvalidDimensionException
 10 | from tree_sitter import Point
 11 | 
 12 | from vectorcode.chunking import Chunk, StringChunker
 13 | from vectorcode.cli_utils import (
 14 |     Config,
 15 |     QueryInclude,
 16 |     cleanup_path,
 17 |     expand_globs,
 18 |     expand_path,
 19 | )
 20 | from vectorcode.common import (
 21 |     ClientManager,
 22 |     get_collection,
 23 |     get_embedding_function,
 24 |     verify_ef,
 25 | )
 26 | from vectorcode.subcommands.query import types as vectorcode_types
 27 | from vectorcode.subcommands.query.reranker import (
 28 |     RerankerError,
 29 |     get_reranker,
 30 | )
 31 | 
 32 | logger = logging.getLogger(name=__name__)
 33 | 
 34 | 
 35 | def convert_query_results(
 36 |     chroma_result: QueryResult, queries: list[str]
 37 | ) -> list[vectorcode_types.QueryResult]:
 38 |     """Convert chromadb query result to in-house query results"""
 39 |     assert chroma_result["documents"] is not None
 40 |     assert chroma_result["distances"] is not None
 41 |     assert chroma_result["metadatas"] is not None
 42 |     assert chroma_result["ids"] is not None
 43 | 
 44 |     chroma_results_list: list[vectorcode_types.QueryResult] = []
 45 |     for q_i in range(len(queries)):
 46 |         q = queries[q_i]
 47 |         documents = chroma_result["documents"][q_i]
 48 |         distances = chroma_result["distances"][q_i]
 49 |         metadatas = chroma_result["metadatas"][q_i]
 50 |         ids = chroma_result["ids"][q_i]
 51 |         for doc, dist, meta, _id in zip(documents, distances, metadatas, ids):
 52 |             chunk = Chunk(text=doc, id=_id)
 53 |             if meta.get("start"):
 54 |                 chunk.start = Point(int(meta.get("start", 0)), 0)
 55 |             if meta.get("end"):
 56 |                 chunk.end = Point(int(meta.get("end", 0)), 0)
 57 |             if meta.get("path"):
 58 |                 chunk.path = str(meta["path"])
 59 |             chroma_results_list.append(
 60 |                 vectorcode_types.QueryResult(
 61 |                     chunk=chunk,
 62 |                     path=str(meta.get("path", "")),
 63 |                     query=(q,),
 64 |                     scores=(-dist,),
 65 |                 )
 66 |             )
 67 |     return chroma_results_list
 68 | 
 69 | 
 70 | async def get_query_result_files(
 71 |     collection: AsyncCollection, configs: Config
 72 | ) -> list[str | Chunk]:
 73 |     query_chunks = []
 74 |     assert configs.query, "Query messages cannot be empty."
 75 |     chunker = StringChunker(configs)
 76 |     for q in configs.query:
 77 |         query_chunks.extend(str(i) for i in chunker.chunk(q))
 78 | 
 79 |     configs.query_exclude = [
 80 |         expand_path(i, True)
 81 |         for i in await expand_globs(configs.query_exclude)
 82 |         if os.path.isfile(i)
 83 |     ]
 84 |     if (await collection.count()) == 0:
 85 |         logger.error("Empty collection!")
 86 |         return []
 87 |     try:
 88 |         if len(configs.query_exclude):
 89 |             logger.info(f"Excluding {len(configs.query_exclude)} files from the query.")
 90 |             filter: dict[str, Any] = {"path": {"$nin": configs.query_exclude}}
 91 |         else:
 92 |             filter = {}
 93 |         num_query = configs.n_result
 94 |         if QueryInclude.chunk in configs.include:
 95 |             if filter:
 96 |                 filter = {"$and": [filter.copy(), {"start": {"$gte": 0}}]}
 97 |             else:
 98 |                 filter["start"] = {"$gte": 0}
 99 |         else:
100 |             num_query = await collection.count()
101 |             if configs.query_multiplier > 0:
102 |                 num_query = min(
103 |                     int(configs.n_result * configs.query_multiplier),
104 |                     await collection.count(),
105 |                 )
106 |                 logger.info(f"Querying {num_query} chunks for reranking.")
107 |         query_embeddings = get_embedding_function(configs)(query_chunks)
108 |         if isinstance(configs.embedding_dims, int) and configs.embedding_dims > 0:
109 |             query_embeddings = [e[: configs.embedding_dims] for e in query_embeddings]
110 |         chroma_query_results: QueryResult = await collection.query(
111 |             query_embeddings=query_embeddings,
112 |             n_results=num_query,
113 |             include=[
114 |                 IncludeEnum.metadatas,
115 |                 IncludeEnum.distances,
116 |                 IncludeEnum.documents,
117 |             ],
118 |             where=cast(Where, filter) or None,
119 |         )
120 |     except IndexError:
121 |         # no results found
122 |         return []
123 | 
124 |     reranker = get_reranker(configs)
125 |     converted_results = convert_query_results(chroma_query_results, configs.query)
126 |     return await reranker.rerank(converted_results)
127 | 
128 | 
129 | async def build_query_results(
130 |     collection: AsyncCollection, configs: Config
131 | ) -> list[dict[str, str | int]]:
132 |     assert configs.project_root
133 | 
134 |     def make_output_path(path: str, absolute: bool) -> str:
135 |         if absolute:
136 |             if os.path.isabs(path):
137 |                 return path
138 |             return os.path.abspath(os.path.join(str(configs.project_root), path))
139 |         else:
140 |             rel_path = os.path.relpath(path, configs.project_root)
141 |             if isinstance(rel_path, bytes):  # pragma: nocover
142 |                 # for some reasons, some python versions report that `os.path.relpath` returns a string.
143 |                 rel_path = rel_path.decode()
144 |             return rel_path
145 | 
146 |     structured_result = []
147 |     for res in await get_query_result_files(collection, configs):
148 |         if isinstance(res, str):
149 |             output_path = make_output_path(res, configs.use_absolute_path)
150 |             io_path = make_output_path(res, True)
151 |             if not os.path.isfile(io_path):
152 |                 logger.warning(f"{io_path} is no longer a valid file.")
153 |                 continue
154 |             with open(io_path) as fin:
155 |                 structured_result.append({"path": output_path, "document": fin.read()})
156 |         else:
157 |             res = cast(Chunk, res)
158 |             assert res.path, f"{res} has no `path` attribute."
159 |             structured_result.append(
160 |                 {
161 |                     "path": make_output_path(res.path, configs.use_absolute_path)
162 |                     if res.path is not None
163 |                     else None,
164 |                     "chunk": res.text,
165 |                     "start_line": res.start.row if res.start is not None else None,
166 |                     "end_line": res.end.row if res.end is not None else None,
167 |                     "chunk_id": res.id,
168 |                 }
169 |             )
170 |     for result in structured_result:
171 |         if result.get("path") is not None:
172 |             result["path"] = cleanup_path(result["path"])
173 |     return structured_result
174 | 
175 | 
176 | async def query(configs: Config) -> int:
177 |     if (
178 |         QueryInclude.chunk in configs.include
179 |         and QueryInclude.document in configs.include
180 |     ):
181 |         logger.error(
182 |             "Having both chunk and document in the output is not supported!",
183 |         )
184 |         return 1
185 |     async with ClientManager().get_client(configs) as client:
186 |         try:
187 |             collection = await get_collection(client, configs, False)
188 |             if not verify_ef(collection, configs):
189 |                 return 1
190 |         except (ValueError, InvalidCollectionException) as e:
191 |             logger.error(
192 |                 f"{e.__class__.__name__}: There's no existing collection for {configs.project_root}",
193 |             )
194 |             return 1
195 |         except InvalidDimensionException as e:
196 |             logger.error(
197 |                 f"{e.__class__.__name__}: The collection was embedded with a different embedding model.",
198 |             )
199 |             return 1
200 |         except IndexError as e:  # pragma: nocover
201 |             logger.error(
202 |                 f"{e.__class__.__name__}: Failed to get the collection. Please check your config."
203 |             )
204 |             return 1
205 | 
206 |         if not configs.pipe:
207 |             print("Starting querying...")
208 | 
209 |         if QueryInclude.chunk in configs.include:
210 |             if len((await collection.get(where={"start": {"$gte": 0}}))["ids"]) == 0:
211 |                 logger.warning(
212 |                     """
213 |     This collection doesn't contain line range metadata. Falling back to `--include path document`. 
214 |     Please re-vectorise it to use `--include chunk`.""",
215 |                 )
216 |                 configs.include = [QueryInclude.path, QueryInclude.document]
217 | 
218 |         try:
219 |             structured_result = await build_query_results(collection, configs)
220 |         except RerankerError as e:  # pragma: nocover
221 |             # error logs should be handled where they're raised
222 |             logger.error(f"{e.__class__.__name__}")
223 |             return 1
224 | 
225 |         if configs.pipe:
226 |             print(json.dumps(structured_result))
227 |         else:
228 |             for idx, result in enumerate(structured_result):
229 |                 for include_item in configs.include:
230 |                     print(f"{include_item.to_header()}{result.get(include_item.value)}")
231 |                 if idx != len(structured_result) - 1:
232 |                     print()
233 |         return 0
234 | 


--------------------------------------------------------------------------------
/lua/vectorcode/cacher/default.lua:
--------------------------------------------------------------------------------
  1 | ---@type VectorCode.CacheBackend
  2 | local M = {}
  3 | 
  4 | local utils = require("vectorcode.utils")
  5 | local vc_config = require("vectorcode.config")
  6 | local notify_opts = vc_config.notify_opts
  7 | local jobrunner = require("vectorcode.jobrunner.cmd")
  8 | 
  9 | local logger = vc_config.logger
 10 | 
 11 | ---@type table<integer, VectorCode.Cache>
 12 | local CACHE = {}
 13 | 
 14 | ---@param bufnr integer
 15 | local function kill_jobs(bufnr)
 16 |   ---@type VectorCode.Cache?
 17 |   local cache = CACHE[bufnr]
 18 |   if cache ~= nil then
 19 |     for job_pid, is_running in pairs(cache.jobs) do
 20 |       if type(is_running) == "number" then
 21 |         vim.uv.kill(job_pid, 15)
 22 |       end
 23 |     end
 24 |   end
 25 | end
 26 | 
 27 | ---@param query_message string|string[]
 28 | ---@param buf_nr integer
 29 | local function async_runner(query_message, buf_nr)
 30 |   if CACHE[buf_nr] == nil or not CACHE[buf_nr].enabled then
 31 |     return
 32 |   end
 33 |   local buf_name
 34 |   vim.schedule(function()
 35 |     buf_name = vim.api.nvim_buf_get_name(buf_nr)
 36 |     logger.debug("Started default cacher job on :", buf_name)
 37 |   end)
 38 |   ---@type VectorCode.Cache
 39 |   local cache = CACHE[buf_nr]
 40 |   local args = {
 41 |     "query",
 42 |     "--pipe",
 43 |     "-n",
 44 |     tostring(cache.options.n_query),
 45 |   }
 46 | 
 47 |   if type(query_message) == "string" then
 48 |     query_message = { query_message }
 49 |   end
 50 |   vim.list_extend(args, query_message)
 51 | 
 52 |   if cache.options.exclude_this then
 53 |     vim.list_extend(args, { "--exclude", vim.api.nvim_buf_get_name(buf_nr) })
 54 |   end
 55 | 
 56 |   local project_root = cache.options.project_root
 57 |   if project_root ~= nil then
 58 |     assert(
 59 |       utils.is_directory(project_root),
 60 |       ("%s is not a valid directory!"):format(project_root)
 61 |     )
 62 |     vim.list_extend(args, { "--project_root", project_root })
 63 |   end
 64 | 
 65 |   if cache.options.single_job then
 66 |     kill_jobs(buf_nr)
 67 |   end
 68 | 
 69 |   CACHE[buf_nr].job_count = CACHE[buf_nr].job_count + 1
 70 |   logger.debug("vectorcode default cacher job args: ", args)
 71 | 
 72 |   -- jobrunner is assumed to be defined at the module level, e.g., local jobrunner = require("vectorcode.jobrunner.cmd")
 73 |   local job_pid
 74 |   job_pid = jobrunner.run_async(
 75 |     args,
 76 |     function(json_result, stderr_error, exit_code, signal)
 77 |       if not M.buf_is_registered(buf_nr) then
 78 |         return
 79 |       end
 80 |       logger.debug("vectorcode ", buf_name, " default cacher results: ", json_result)
 81 |       CACHE[buf_nr].job_count = CACHE[buf_nr].job_count - 1
 82 |       assert(job_pid ~= nil, "Failed to fetch the job pid.")
 83 |       CACHE[buf_nr].jobs[job_pid] = nil
 84 | 
 85 |       if exit_code ~= 0 then
 86 |         vim.schedule(function()
 87 |           if CACHE[buf_nr].options.notify then
 88 |             if signal == 15 then
 89 |               vim.notify("Retrieval aborted.", vim.log.levels.INFO, notify_opts)
 90 |             else
 91 |               vim.notify(
 92 |                 "Retrieval failed:\\n" .. table.concat(stderr_error, "\n"),
 93 |                 vim.log.levels.WARN,
 94 |                 notify_opts
 95 |               )
 96 |             end
 97 |           end
 98 |         end)
 99 |         return
100 |       end
101 |       cache = CACHE[buf_nr]
102 |       cache.retrieval = json_result or {}
103 |       vim.schedule(function()
104 |         if cache.options.notify then
105 |           vim.notify(
106 |             ("Caching for buffer %d has completed."):format(buf_nr),
107 |             vim.log.levels.INFO,
108 |             notify_opts
109 |           )
110 |         end
111 |       end)
112 |     end,
113 |     buf_nr
114 |   )
115 | 
116 |   ---@type VectorCode.Cache
117 |   cache = CACHE[buf_nr]
118 |   if job_pid then
119 |     cache.last_run = vim.uv.clock_gettime("realtime").sec
120 |     cache.jobs[job_pid] = vim.uv.clock_gettime("realtime").sec
121 |   end
122 |   vim.schedule(function()
123 |     if cache.options.notify then
124 |       vim.notify(
125 |         ("Caching for buffer %d has started."):format(buf_nr),
126 |         vim.log.levels.INFO,
127 |         notify_opts
128 |       )
129 |     end
130 |   end)
131 | end
132 | 
133 | M.register_buffer = vc_config.check_cli_wrap(
134 |   ---This function registers a buffer to be cached by VectorCode. The
135 |   ---registered buffer can be acquired by the `query_from_cache` API.
136 |   ---The retrieval of the files occurs in the background, so this
137 |   ---function will not block the main thread.
138 |   ---
139 |   ---NOTE: this function uses an autocommand to track the changes to the buffer and trigger retrieval.
140 |   ---@param bufnr integer? Default to the current buffer.
141 |   ---@param opts VectorCode.RegisterOpts? Async options.
142 |   function(bufnr, opts)
143 |     if bufnr == 0 or bufnr == nil then
144 |       bufnr = vim.api.nvim_get_current_buf()
145 |     end
146 |     logger.info(
147 |       ("Registering buffer %s %s for default cacher."):format(
148 |         bufnr,
149 |         vim.api.nvim_buf_get_name(bufnr)
150 |       )
151 |     )
152 |     if M.buf_is_registered(bufnr) then
153 |       opts = vim.tbl_deep_extend("force", CACHE[bufnr].options, opts or {})
154 |     end
155 |     opts =
156 |       vim.tbl_deep_extend("force", vc_config.get_user_config().async_opts, opts or {})
157 | 
158 |     if M.buf_is_registered(bufnr) then
159 |       -- update the options and/or query_cb
160 |       CACHE[bufnr].options =
161 |         vim.tbl_deep_extend("force", CACHE[bufnr].options, opts or {})
162 |       logger.debug(
163 |         ("Updated `default` cacher opts for buffer %s:\n%s"):format(
164 |           bufnr,
165 |           vim.inspect(opts)
166 |         )
167 |       )
168 |     else
169 |       CACHE[bufnr] = {
170 |         enabled = true,
171 |         retrieval = nil,
172 |         options = opts,
173 |         jobs = {},
174 |         job_count = 0,
175 |       }
176 |     end
177 |     if opts.run_on_register then
178 |       async_runner(opts.query_cb(bufnr), bufnr)
179 |     end
180 |     local group = vim.api.nvim_create_augroup(
181 |       ("VectorCodeCacheGroup%d"):format(bufnr),
182 |       { clear = true }
183 |     )
184 |     vim.api.nvim_create_autocmd(opts.events, {
185 |       group = group,
186 |       callback = function()
187 |         assert(CACHE[bufnr] ~= nil, "buffer vectorcode cache not registered")
188 |         local cache = CACHE[bufnr]
189 |         if
190 |           cache.last_run == nil
191 |           or (vim.uv.clock_gettime("realtime").sec - cache.last_run) > opts.debounce
192 |         then
193 |           local cb = cache.options.query_cb
194 |           assert(type(cb) == "function", "`cb` should be a function.")
195 |           async_runner(cb(bufnr), bufnr)
196 |         end
197 |       end,
198 |       buffer = bufnr,
199 |       desc = "Run query on certain autocmd",
200 |     })
201 |     vim.api.nvim_create_autocmd("BufWinLeave", {
202 |       buffer = bufnr,
203 |       desc = "Kill all running VectorCode async jobs.",
204 |       group = group,
205 |       callback = function()
206 |         kill_jobs(bufnr)
207 |       end,
208 |     })
209 |   end
210 | )
211 | 
212 | M.deregister_buffer = vc_config.check_cli_wrap(
213 |   ---This function deregisters a buffer from VectorCode. This will kill all
214 |   ---running jobs, delete cached results, and deregister the autocommands
215 |   ---associated with the buffer. If the caching has not been registered, an
216 |   ---error notification will bef ired.
217 |   ---@param bufnr integer?
218 |   ---@param opts {notify:boolean}
219 |   function(bufnr, opts)
220 |     opts = opts or { notify = false }
221 |     if bufnr == nil or bufnr == 0 then
222 |       bufnr = vim.api.nvim_get_current_buf()
223 |     end
224 |     logger.info(
225 |       ("Deregistering buffer %s %s"):format(bufnr, vim.api.nvim_buf_get_name(bufnr))
226 |     )
227 |     if M.buf_is_registered(bufnr) then
228 |       kill_jobs(bufnr)
229 |       vim.api.nvim_del_augroup_by_name(("VectorCodeCacheGroup%d"):format(bufnr))
230 |       CACHE[bufnr] = nil
231 |       if opts.notify then
232 |         vim.notify(
233 |           ("VectorCode Caching has been unregistered for buffer %d."):format(bufnr),
234 |           vim.log.levels.INFO,
235 |           notify_opts
236 |         )
237 |       end
238 |     else
239 |       vim.notify(
240 |         ("VectorCode Caching hasn't been registered for buffer %d."):format(bufnr),
241 |         vim.log.levels.ERROR,
242 |         notify_opts
243 |       )
244 |     end
245 |   end
246 | )
247 | 
248 | ---@param bufnr integer?
249 | ---@return boolean
250 | M.buf_is_registered = function(bufnr)
251 |   if bufnr == 0 or bufnr == nil then
252 |     bufnr = vim.api.nvim_get_current_buf()
253 |   end
254 |   return type(CACHE[bufnr]) == "table" and not vim.tbl_isempty(CACHE[bufnr])
255 | end
256 | 
257 | M.query_from_cache = vc_config.check_cli_wrap(
258 |   ---This function queries VectorCode from cache. Returns an array of results. Each item
259 |   ---of the array is in the format of `{path="path/to/your/code.lua", document="document content"}`.
260 |   ---@param bufnr integer?
261 |   ---@param opts {notify: boolean}?
262 |   ---@return VectorCode.QueryResult[]
263 |   function(bufnr, opts)
264 |     local result = {}
265 |     if bufnr == 0 or bufnr == nil then
266 |       bufnr = vim.api.nvim_get_current_buf()
267 |     end
268 |     if M.buf_is_registered(bufnr) then
269 |       opts = vim.tbl_deep_extend(
270 |         "force",
271 |         { notify = CACHE[bufnr].options.notify },
272 |         opts or {}
273 |       )
274 |       result = CACHE[bufnr].retrieval or {}
275 |       local message = ("Retrieved %d documents from cache."):format(#result)
276 |       logger.trace(("vectorcode cmd cacher for buf %s: %s"):format(bufnr, message))
277 |       if opts.notify then
278 |         vim.schedule(function()
279 |           vim.notify(message, vim.log.levels.INFO, notify_opts)
280 |         end)
281 |       end
282 |     end
283 |     return result
284 |   end
285 | )
286 | 
287 | ---@alias ComponentCallback fun(result:VectorCode.QueryResult):string
288 | 
289 | ---Compile the retrieval results into a string.
290 | ---@param bufnr integer
291 | ---@param component_cb ComponentCallback? The component callback that formats a retrieval result.
292 | ---@return {content:string, count:integer}
293 | function M.make_prompt_component(bufnr, component_cb)
294 |   if bufnr == 0 or bufnr == nil then
295 |     bufnr = vim.api.nvim_get_current_buf()
296 |   end
297 |   if not M.buf_is_registered(bufnr) then
298 |     return { content = "", count = 0 }
299 |   end
300 |   if component_cb == nil then
301 |     ---@type fun(result:VectorCode.QueryResult):string
302 |     component_cb = function(result)
303 |       return "<|file_sep|>" .. result.path .. "\n" .. result.document
304 |     end
305 |   end
306 |   local final_component = ""
307 |   local retrieval = M.query_from_cache(bufnr)
308 |   for _, file in pairs(retrieval) do
309 |     final_component = final_component .. component_cb(file)
310 |   end
311 |   return { content = final_component, count = #retrieval }
312 | end
313 | 
314 | ---Checks if VectorCode has been configured properly for your project.
315 | ---See the CLI manual for details.
316 | ---@param check_item string?
317 | ---@param on_success fun(out: vim.SystemCompleted)?
318 | ---@param on_failure fun(out: vim.SystemCompleted?)?
319 | function M.async_check(check_item, on_success, on_failure)
320 |   vim.deprecate(
321 |     "vectorcode.cacher.default.async_check",
322 |     'require("vectorcode.cacher").utils.async_check',
323 |     "0.7.0",
324 |     "VectorCode",
325 |     true
326 |   )
327 |   require("vectorcode.cacher").utils.async_check(check_item, on_success, on_failure)
328 | end
329 | 
330 | ---@param bufnr integer?
331 | ---@return integer
332 | function M.buf_job_count(bufnr)
333 |   if bufnr == nil or bufnr == 0 then
334 |     bufnr = vim.api.nvim_get_current_buf()
335 |   end
336 |   if M.buf_is_registered(bufnr) then
337 |     return CACHE[bufnr].job_count
338 |   else
339 |     return 0
340 |   end
341 | end
342 | 
343 | ---@param bufnr integer?
344 | ---@return boolean
345 | function M.buf_is_enabled(bufnr)
346 |   if bufnr == nil or bufnr == 0 then
347 |     bufnr = vim.api.nvim_get_current_buf()
348 |   end
349 |   return CACHE[bufnr] ~= nil and CACHE[bufnr].enabled
350 | end
351 | 
352 | return M
353 | 


--------------------------------------------------------------------------------