├── tests ├── __init__.py ├── conftest.py └── subcommands │ ├── files │ ├── test_files.py │ ├── test_files_ls.py │ └── test_files_rm.py │ ├── test_prompts.py │ ├── test_check.py │ ├── test_drop.py │ ├── query │ ├── test_types.py │ └── test_reranker.py │ ├── test_clean.py │ ├── test_chunks.py │ ├── test_update.py │ └── test_ls.py ├── .vectorcode ├── vectorcode.exclude └── vectorcode.include ├── neovim.toml ├── images ├── sudoku_no_rag.png ├── sudoku_with_rag.png └── codecompanion_chat.png ├── selene.toml ├── stylua.toml ├── src └── vectorcode │ ├── __init__.py │ ├── subcommands │ ├── chunks.py │ ├── check.py │ ├── files │ │ ├── __init__.py │ │ ├── ls.py │ │ └── rm.py │ ├── __init__.py │ ├── query │ │ ├── reranker │ │ │ ├── naive.py │ │ │ ├── cross_encoder.py │ │ │ ├── __init__.py │ │ │ └── base.py │ │ ├── types.py │ │ └── __init__.py │ ├── drop.py │ ├── clean.py │ ├── ls.py │ ├── prompt.py │ ├── update.py │ └── init.py │ ├── debugging.py │ └── main.py ├── lua ├── vectorcode │ ├── integrations │ │ ├── init.lua │ │ ├── codecompanion │ │ │ ├── prompts │ │ │ │ ├── presets.lua │ │ │ │ └── init.lua │ │ │ ├── init.lua │ │ │ ├── common.lua │ │ │ ├── ls_tool.lua │ │ │ ├── files_ls_tool.lua │ │ │ ├── files_rm_tool.lua │ │ │ └── vectorise_tool.lua │ │ ├── heirline.lua │ │ ├── lualine.lua │ │ └── copilotchat.lua │ ├── jobrunner │ │ ├── cmd.lua │ │ ├── init.lua │ │ └── lsp.lua │ ├── cacher │ │ ├── init.lua │ │ └── default.lua │ ├── utils.lua │ ├── config.lua │ ├── types.lua │ └── init.lua └── codecompanion │ └── _extensions │ └── vectorcode │ └── init.lua ├── .github ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── enhancement.md │ └── bug_report.md ├── workflows │ ├── selene.yml │ ├── formatting.yaml │ ├── release.yml │ ├── test_and_cov.yml │ └── panvimdoc.yml └── FUNDING.yml ├── .pre-commit-config.yaml ├── Makefile ├── LICENSE ├── pyproject.toml ├── plugin └── vectorcode.lua ├── docs └── CONTRIBUTING.md ├── .gitignore └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.vectorcode/vectorcode.exclude: -------------------------------------------------------------------------------- 1 | .vectorcode/ 2 | .github/ 3 | -------------------------------------------------------------------------------- /neovim.toml: -------------------------------------------------------------------------------- 1 | [selene] 2 | base = "lua51" 3 | name = "neovim" 4 | 5 | [vim] 6 | any = true 7 | -------------------------------------------------------------------------------- /images/sudoku_no_rag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Davidyz/VectorCode/HEAD/images/sudoku_no_rag.png -------------------------------------------------------------------------------- /images/sudoku_with_rag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Davidyz/VectorCode/HEAD/images/sudoku_with_rag.png -------------------------------------------------------------------------------- /images/codecompanion_chat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Davidyz/VectorCode/HEAD/images/codecompanion_chat.png -------------------------------------------------------------------------------- /.vectorcode/vectorcode.include: -------------------------------------------------------------------------------- 1 | lua/vectorcode/**/*.lua 2 | src/vectorcode/**/*.py 3 | plugin/vectorcode.lua 4 | docs/*.md 5 | tests/**/*.py 6 | -------------------------------------------------------------------------------- /selene.toml: -------------------------------------------------------------------------------- 1 | std = "neovim" 2 | exclude = ['lua/vectorcode/integrations/codecompanion/legacy_tool.lua'] 3 | 4 | [rules] 5 | mixed_table = "allow" 6 | -------------------------------------------------------------------------------- /stylua.toml: -------------------------------------------------------------------------------- 1 | indent_type = "Spaces" 2 | indent_width = 2 3 | column_width = 88 4 | quote_style = "AutoPreferDouble" 5 | no_call_parentheses = false 6 | -------------------------------------------------------------------------------- /src/vectorcode/__init__.py: -------------------------------------------------------------------------------- 1 | try: # pragma: no cover 2 | # this will be populated by pdm build backend when building. 3 | from vectorcode._version import __version__ 4 | except Exception: 5 | __version__ = "0.0.0" 6 | -------------------------------------------------------------------------------- /lua/vectorcode/integrations/init.lua: -------------------------------------------------------------------------------- 1 | return { 2 | codecompanion = require("vectorcode.integrations.codecompanion"), 3 | copilotchat = require("vectorcode.integrations.copilotchat"), 4 | lualine = require("vectorcode.integrations.lualine"), 5 | heirline = require("vectorcode.integrations.heirline"), 6 | } 7 | -------------------------------------------------------------------------------- /lua/vectorcode/integrations/codecompanion/prompts/presets.lua: -------------------------------------------------------------------------------- 1 | ---@type table 2 | local M = {} 3 | 4 | M["Neovim Tutor"] = { 5 | project_root = vim.fs.normalize(vim.env.VIMRUNTIME), 6 | file_patterns = { "lua/**/*.lua", "doc/**/*.txt" }, 7 | } 8 | 9 | return M 10 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from vectorcode.cli_utils import GLOBAL_CONFIG_DIR 4 | 5 | 6 | @pytest.fixture(autouse=True) 7 | def restore_global_config_path(): 8 | global GLOBAL_CONFIG_DIR 9 | original_global_config_path = GLOBAL_CONFIG_DIR 10 | yield 11 | GLOBAL_CONFIG_DIR = original_global_config_path 12 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | contact_links: 3 | - name: Ask a question 4 | url: https://github.com/Davidyz/VectorCode/discussions/new?category=q-a 5 | about: If you're new to VectorCode and is having trouble setting it up, post in discussions first. We can convert it to an issue if something's indeed wrong with VectorCode. 6 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/astral-sh/ruff-pre-commit 3 | rev: v0.9.1 4 | hooks: 5 | - id: ruff 6 | language: python 7 | - id: ruff 8 | language: python 9 | args: [ "check", "--fix", "--select", "I" ] 10 | - id: ruff-format 11 | language: python 12 | - repo: https://github.com/JohnnyMorganz/StyLua 13 | rev: v2.0.2 14 | hooks: 15 | - id: stylua-github 16 | language: lua 17 | -------------------------------------------------------------------------------- /src/vectorcode/subcommands/chunks.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from vectorcode.chunking import TreeSitterChunker 4 | from vectorcode.cli_utils import Config 5 | 6 | 7 | async def chunks(configs: Config) -> int: 8 | chunker = TreeSitterChunker(configs) 9 | result = [] 10 | for file_path in configs.files: 11 | result.append(list(i.export_dict() for i in chunker.chunk(str(file_path)))) 12 | print(json.dumps((result))) 13 | return 0 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/enhancement.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Enhancement 3 | about: Discuss what new features can be added, or existing features improved. 4 | title: "[FEAT]" 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the feature you want** 11 | 12 | > Tell me about the feature, ideally with a scenario where this feature would be 13 | useful. 14 | 15 | **Complimentary Material** 16 | 17 | > If there's any material that may help me implement/test this feature, please 18 | list them here. 19 | -------------------------------------------------------------------------------- /.github/workflows/selene.yml: -------------------------------------------------------------------------------- 1 | name: Selene check 2 | 3 | on: 4 | push: 5 | branches: 6 | - "main" 7 | paths: 8 | - "lua/**/*.lua" 9 | - "plugin/*.lua" 10 | pull_request: 11 | 12 | jobs: 13 | selene: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v4 17 | - name: Run Selene check 18 | uses: NTBBloodbath/selene-action@v1.0.0 19 | with: 20 | token: ${{ secrets.GITHUB_TOKEN }} 21 | args: lua/ 22 | version: 0.28.0 23 | -------------------------------------------------------------------------------- /.github/workflows/formatting.yaml: -------------------------------------------------------------------------------- 1 | name: Style check 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | style-check: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v4 16 | 17 | - name: Ruff 18 | uses: astral-sh/ruff-action@v3 19 | 20 | - uses: JohnnyMorganz/stylua-action@v4 21 | with: 22 | token: ${{ secrets.GITHUB_TOKEN }} 23 | version: latest 24 | # CLI arguments 25 | args: --check . 26 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: multitest 2 | 3 | DEFAULT_GROUPS=--group dev --group lsp --group mcp --group debug 4 | 5 | deps: 6 | pdm lock $(DEFAULT_GROUPS) || pdm lock $(DEFAULT_GROUPS) --group legacy; \ 7 | pdm install 8 | 9 | test: 10 | make deps; \ 11 | pdm run pytest --enable-coredumpy --coredumpy-dir dumps 12 | 13 | multitest: 14 | @for i in {11..13}; do \ 15 | pdm use python3.$$i; \ 16 | make test; \ 17 | done 18 | 19 | coverage: 20 | make deps; \ 21 | pdm run coverage run -m pytest; \ 22 | pdm run coverage html; \ 23 | pdm run coverage report -m 24 | 25 | lint: 26 | pdm run ruff check src/**/*.py; \ 27 | pdm run basedpyright src/**/*.py; \ 28 | selene lua/**/*.lua plugin/*.lua 29 | -------------------------------------------------------------------------------- /src/vectorcode/subcommands/check.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | 4 | from vectorcode.cli_utils import CHECK_OPTIONS, Config, find_project_config_dir 5 | 6 | 7 | async def check(configs: Config) -> int: 8 | assert isinstance(configs.check_item, str) 9 | assert configs.check_item.lower() in CHECK_OPTIONS 10 | match configs.check_item: 11 | case "config": 12 | project_local_config = await find_project_config_dir(".") 13 | if project_local_config is None: 14 | print("Failed!", file=sys.stderr) 15 | return 1 16 | else: 17 | print(str(Path(project_local_config).parent), end="") 18 | return 0 19 | -------------------------------------------------------------------------------- /lua/vectorcode/integrations/codecompanion/init.lua: -------------------------------------------------------------------------------- 1 | ---@module "codecompanion" 2 | 3 | return { 4 | chat = { 5 | ---@param subcommand sub_cmd 6 | ---@param opts VectorCode.CodeCompanion.ToolOpts 7 | ---@return CodeCompanion.Tools.Tool 8 | make_tool = function(subcommand, opts) 9 | local has = require("codecompanion").has 10 | if has ~= nil and has("function-calling") then 11 | return require( 12 | string.format("vectorcode.integrations.codecompanion.%s_tool", subcommand) 13 | )(opts) 14 | else 15 | error("Unsupported version of codecompanion!") 16 | end 17 | end, 18 | prompts = require("vectorcode.integrations.codecompanion.prompts"), 19 | }, 20 | } 21 | -------------------------------------------------------------------------------- /src/vectorcode/subcommands/files/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from vectorcode.cli_utils import Config, FilesAction 4 | 5 | logger = logging.getLogger(name=__name__) 6 | 7 | 8 | async def files(configs: Config) -> int: 9 | match configs.files_action: 10 | case FilesAction.ls: 11 | from vectorcode.subcommands.files import ls 12 | 13 | return await ls.ls(configs) 14 | case FilesAction.rm: 15 | from vectorcode.subcommands.files import rm 16 | 17 | return await rm.rm(configs) 18 | case _: 19 | logger.error( 20 | f"Unsupported subcommand for `vectorcode files`: {configs.action}" 21 | ) 22 | return 1 23 | -------------------------------------------------------------------------------- /src/vectorcode/subcommands/__init__.py: -------------------------------------------------------------------------------- 1 | from vectorcode.subcommands.check import check 2 | from vectorcode.subcommands.chunks import chunks 3 | from vectorcode.subcommands.clean import clean 4 | from vectorcode.subcommands.drop import drop 5 | from vectorcode.subcommands.files import files 6 | from vectorcode.subcommands.init import init 7 | from vectorcode.subcommands.ls import ls 8 | from vectorcode.subcommands.prompt import prompts 9 | from vectorcode.subcommands.query import query 10 | from vectorcode.subcommands.update import update 11 | from vectorcode.subcommands.vectorise import vectorise 12 | 13 | __all__ = [ 14 | "check", 15 | "chunks", 16 | "clean", 17 | "drop", 18 | "files", 19 | "init", 20 | "ls", 21 | "prompts", 22 | "query", 23 | "update", 24 | "vectorise", 25 | ] 26 | -------------------------------------------------------------------------------- /src/vectorcode/subcommands/files/ls.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | 4 | from vectorcode.cli_utils import Config 5 | from vectorcode.common import ClientManager, get_collection, list_collection_files 6 | 7 | logger = logging.getLogger(name=__name__) 8 | 9 | 10 | async def ls(configs: Config) -> int: 11 | async with ClientManager().get_client(configs=configs) as client: 12 | try: 13 | collection = await get_collection(client, configs, False) 14 | except ValueError: 15 | logger.error(f"There's no existing collection at {configs.project_root}.") 16 | return 1 17 | paths = await list_collection_files(collection) 18 | if configs.pipe: 19 | print(json.dumps(list(paths))) 20 | else: 21 | for p in paths: 22 | print(p) 23 | return 0 24 | -------------------------------------------------------------------------------- /lua/vectorcode/integrations/heirline.lua: -------------------------------------------------------------------------------- 1 | ---@class VectorCode.Heirline.Opts: VectorCode.Lualine.Opts 2 | --- Other heirline component fields (like `hl`, `on_click`, `update`, etc.) 3 | ---@field component_opts table 4 | 5 | ---@type VectorCode.Heirline.Opts 6 | local default_opts = { show_job_count = false, component_opts = {} } 7 | 8 | ---@param opts VectorCode.Heirline.Opts? 9 | return function(opts) 10 | opts = vim.tbl_deep_extend("force", default_opts, opts or {}) --[[@as VectorCode.Heirline.Opts]] 11 | local lualine_comp = require("vectorcode.integrations").lualine(opts) 12 | local heirline_component = { 13 | provider = function(_) 14 | return lualine_comp[1]() 15 | end, 16 | condition = function(_) 17 | return lualine_comp.cond() 18 | end, 19 | } 20 | 21 | return vim.tbl_deep_extend("force", heirline_component, opts.component_opts) 22 | end 23 | -------------------------------------------------------------------------------- /src/vectorcode/subcommands/query/reranker/naive.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Any 3 | 4 | from vectorcode.cli_utils import Config 5 | from vectorcode.subcommands.query.types import QueryResult 6 | 7 | from .base import RerankerBase 8 | 9 | logger = logging.getLogger(name=__name__) 10 | 11 | 12 | class NaiveReranker(RerankerBase): 13 | """This reranker uses the distances between the embedding vectors in the database for the queries and the chunks as the measure of relevance. 14 | No special configs required. 15 | configs.reranker_params will be ignored. 16 | """ 17 | 18 | def __init__(self, configs: Config, **kwargs: Any): 19 | super().__init__(configs) 20 | 21 | async def compute_similarity(self, results: list[QueryResult]): 22 | """ 23 | Do nothing, because the QueryResult objects already contain distances. 24 | """ 25 | pass 26 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 12 | polar: # Replace with a single Polar username 13 | buy_me_a_coffee: davidyz # Replace with a single Buy Me a Coffee username 14 | thanks_dev: # Replace with a single thanks.dev username 15 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 16 | -------------------------------------------------------------------------------- /src/vectorcode/subcommands/drop.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from chromadb.errors import InvalidCollectionException 4 | 5 | from vectorcode.cli_utils import Config 6 | from vectorcode.common import ClientManager, get_collection 7 | 8 | logger = logging.getLogger(name=__name__) 9 | 10 | 11 | async def drop(config: Config) -> int: 12 | async with ClientManager().get_client(config) as client: 13 | try: 14 | collection = await get_collection(client, config) 15 | collection_path = collection.metadata["path"] 16 | await client.delete_collection(collection.name) 17 | print(f"Collection for {collection_path} has been deleted.") 18 | logger.info(f"Deteted collection at {collection_path}.") 19 | return 0 20 | except (ValueError, InvalidCollectionException) as e: 21 | logger.error( 22 | f"{e.__class__.__name__}: There's no existing collection for {config.project_root}" 23 | ) 24 | return 1 25 | -------------------------------------------------------------------------------- /src/vectorcode/subcommands/clean.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from chromadb.api import AsyncClientAPI 5 | 6 | from vectorcode.cli_utils import Config 7 | from vectorcode.common import ClientManager, get_collections 8 | 9 | logger = logging.getLogger(name=__name__) 10 | 11 | 12 | async def run_clean_on_client(client: AsyncClientAPI, pipe_mode: bool): 13 | async for collection in get_collections(client): 14 | meta = collection.metadata 15 | logger.debug(f"{meta.get('path')}: {await collection.count()} chunk(s)") 16 | if await collection.count() == 0 or not os.path.isdir(meta["path"]): 17 | await client.delete_collection(collection.name) 18 | logger.info(f"Deleted collection for {meta['path']}") 19 | if not pipe_mode: 20 | print(f"Deleted {meta['path']}.") 21 | 22 | 23 | async def clean(configs: Config) -> int: 24 | async with ClientManager().get_client(configs) as client: 25 | await run_clean_on_client(client, configs.pipe) 26 | return 0 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 David 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/subcommands/files/test_files.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import AsyncMock, patch 2 | 3 | import pytest 4 | 5 | from vectorcode.cli_utils import CliAction, Config, FilesAction 6 | from vectorcode.subcommands.files import files 7 | 8 | 9 | @pytest.mark.asyncio 10 | async def test_files(): 11 | with patch( 12 | "vectorcode.subcommands.files.ls.ls", return_value=AsyncMock() 13 | ) as mock_ls: 14 | config = Config(action=CliAction.files, files_action=FilesAction.ls) 15 | await files(config) 16 | mock_ls.assert_called_with(config) 17 | with patch( 18 | "vectorcode.subcommands.files.rm.rm", return_value=AsyncMock() 19 | ) as mock_rm: 20 | config = Config(action=CliAction.files, files_action=FilesAction.rm) 21 | await files(config) 22 | mock_rm.assert_called_with(config) 23 | 24 | 25 | @pytest.mark.asyncio 26 | async def test_files_invalid_actions(): 27 | with patch("vectorcode.subcommands.files.logger") as mock_logger: 28 | config = Config(action=CliAction.files, files_action="foobar") 29 | assert await files(config) != 0 30 | mock_logger.error.assert_called_once() 31 | -------------------------------------------------------------------------------- /src/vectorcode/subcommands/files/rm.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from typing import cast 4 | 5 | from chromadb.types import Where 6 | 7 | from vectorcode.cli_utils import Config, expand_path 8 | from vectorcode.common import ClientManager, get_collection 9 | 10 | logger = logging.getLogger(name=__name__) 11 | 12 | 13 | async def rm(configs: Config) -> int: 14 | async with ClientManager().get_client(configs=configs) as client: 15 | try: 16 | collection = await get_collection(client, configs, False) 17 | except ValueError: 18 | logger.error(f"There's no existing collection at {configs.project_root}.") 19 | return 1 20 | paths = list( 21 | str(expand_path(p, True)) for p in configs.rm_paths if os.path.isfile(p) 22 | ) 23 | await collection.delete(where=cast(Where, {"path": {"$in": paths}})) 24 | if not configs.pipe: 25 | print(f"Removed {len(paths)} file(s).") 26 | if await collection.count() == 0: 27 | logger.warning( 28 | f"The collection at {configs.project_root} is now empty and will be removed." 29 | ) 30 | await client.delete_collection(collection.name) 31 | return 0 32 | -------------------------------------------------------------------------------- /lua/vectorcode/integrations/lualine.lua: -------------------------------------------------------------------------------- 1 | local vc_config = require("vectorcode.config") 2 | 3 | ---@class VectorCode.Lualine.Opts 4 | ---Whether to show the number of running async jobs. 5 | ---@field show_job_count boolean 6 | 7 | ---@param opts VectorCode.Lualine.Opts? 8 | return function(opts) 9 | opts = vim.tbl_deep_extend("force", { show_job_count = false }, opts or {}) --[[@as VectorCode.Lualine.Opts]] 10 | local cacher = vc_config.get_cacher_backend() 11 | return { 12 | function() 13 | local message = "VectorCode: " 14 | if cacher.buf_is_enabled(0) then 15 | local retrieval = cacher.query_from_cache(0, { notify = false }) 16 | if retrieval then 17 | message = message .. tostring(#retrieval) 18 | end 19 | local job_count = cacher.buf_job_count(0) 20 | if job_count > 0 then 21 | if opts.show_job_count then 22 | message = message .. (" (%d) "):format(job_count) 23 | else 24 | message = message .. "  " 25 | end 26 | else 27 | message = message .. "  " 28 | end 29 | else 30 | message = message .. " " 31 | end 32 | return message 33 | end, 34 | cond = function() 35 | return cacher.buf_is_registered() 36 | end, 37 | } 38 | end 39 | -------------------------------------------------------------------------------- /tests/subcommands/test_prompts.py: -------------------------------------------------------------------------------- 1 | import io 2 | import json 3 | import sys 4 | 5 | from vectorcode.cli_utils import Config, PromptCategory 6 | from vectorcode.subcommands import prompt 7 | 8 | 9 | def test_prompts_pipe_true(): 10 | configs = Config(pipe=True, prompt_categories=PromptCategory) 11 | 12 | # Mock stdout 13 | captured_output = io.StringIO() 14 | sys.stdout = captured_output 15 | 16 | return_code = prompt.prompts(configs) 17 | 18 | sys.stdout = sys.__stdout__ # Reset stdout 19 | 20 | expected_output = ( 21 | json.dumps(sorted(sum(prompt.prompt_by_categories.values(), start=[]))) + "\n" 22 | ) 23 | assert captured_output.getvalue() == expected_output 24 | assert return_code == 0 25 | 26 | 27 | def test_prompts_pipe_false(): 28 | configs = Config(pipe=False, prompt_categories=PromptCategory) 29 | 30 | # Mock stdout 31 | captured_output = io.StringIO() 32 | sys.stdout = captured_output 33 | 34 | return_code = prompt.prompts(configs) 35 | 36 | sys.stdout = sys.__stdout__ # Reset stdout 37 | 38 | expected_output = "" 39 | for i in sorted(sum(prompt.prompt_by_categories.values(), start=[])): 40 | expected_output += f"- {i}\n" 41 | 42 | assert captured_output.getvalue() == expected_output 43 | assert return_code == 0 44 | -------------------------------------------------------------------------------- /tests/subcommands/test_check.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | import pytest 4 | 5 | from vectorcode.cli_utils import CHECK_OPTIONS, Config 6 | from vectorcode.subcommands import check 7 | 8 | 9 | @pytest.mark.asyncio 10 | async def test_check_config_success(capsys, tmp_path): 11 | # Create a temporary .vectorcode directory 12 | project_root = tmp_path / ".vectorcode" 13 | project_root.mkdir() 14 | 15 | config = Config(check_item="config") 16 | 17 | with patch("os.getcwd", return_value=str(tmp_path)): 18 | result = await check(config) 19 | captured = capsys.readouterr() 20 | 21 | assert result == 0 22 | assert str(tmp_path) == captured.out 23 | 24 | 25 | @pytest.mark.asyncio 26 | async def test_check_config_failure(capsys, tmp_path): 27 | # Ensure no .vectorcode directory exists 28 | config = Config(check_item="config") 29 | with patch("os.getcwd", return_value=str(tmp_path)): 30 | result = await check(config) 31 | captured = capsys.readouterr() 32 | 33 | assert result == 1 34 | assert "Failed!" in captured.err 35 | 36 | 37 | @pytest.mark.asyncio 38 | async def test_check_invalid_check_item(): 39 | config = Config(check_item="invalid_item") 40 | with pytest.raises(AssertionError): 41 | await check(config) 42 | 43 | 44 | def test_check_options(): 45 | assert "config" in CHECK_OPTIONS 46 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG]" 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. ... 16 | 2. ... 17 | 18 | **Expected behavior** 19 | A clear and concise description of what you expected to happen. 20 | 21 | **VectorCode Configuration** 22 | Please attach your `/.vectorcode/config.json` or 23 | `~/.config/vectorcode/config.json` here. 24 | ```json 25 | 26 | ``` 27 | 28 | For issues with the Neovim plugin, please also attach your `setup` options: 29 | ```lua 30 | 31 | ``` 32 | If it only occurs when you use VectorCode with a particular plugin, please 33 | attach the relevant config here: 34 | ```lua 35 | 36 | ``` 37 | 38 | **Platform information:** 39 | - If the issue is about the CLI, attach a list of packages in the Python virtual environment: 40 | - for `pipx`, run `pipx runpip vectorcode freeze`; 41 | - for `uv`, run `uv tool run --from=vectorcode python -m ensurepip && uv tool run --from=vectorcode python -m pip freeze`. 42 | ``` 43 | 44 | ``` 45 | - If the issue is about the neovim plugin, attach the neovim version you're using: 46 | 47 | 48 | **System Information:** 49 | 50 | > For Mac users, please also mention whether you're using intel or apple silicon devices. 51 | 52 | - OS: Linux, MacOS, Windows... 53 | 54 | **Additional context** 55 | Add any other context about the problem here. Please attach 56 | [CLI logs](https://github.com/Davidyz/VectorCode/blob/main/docs/cli.md#debugging-and-diagnosing) 57 | or 58 | [nvim plugin logs](https://github.com/Davidyz/VectorCode/blob/main/docs/neovim.md#debugging-and-logging) 59 | if applicable. 60 | -------------------------------------------------------------------------------- /src/vectorcode/subcommands/query/reranker/cross_encoder.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Any 3 | 4 | from vectorcode.cli_utils import Config 5 | from vectorcode.subcommands.query.types import QueryResult 6 | 7 | from .base import RerankerBase 8 | 9 | logger = logging.getLogger(name=__name__) 10 | 11 | 12 | class CrossEncoderReranker(RerankerBase): 13 | """This reranker uses [`CrossEncoder` from the sentence_transformers library](https://sbert.net/docs/package_reference/cross_encoder/cross_encoder.html) for reranking. 14 | Parameters in configs.params will be passed to the `CrossEncoder` class in the `sentence_transformers` library. 15 | The default model is 'cross-encoder/ms-marco-MiniLM-L-6-v2'. 16 | Consult sentence_transformers documentation for details on the available parameters. 17 | """ 18 | 19 | def __init__( 20 | self, 21 | configs: Config, 22 | **kwargs: Any, 23 | ): 24 | super().__init__(configs) 25 | from sentence_transformers import CrossEncoder 26 | 27 | if configs.reranker_params.get("model_name_or_path") is None: 28 | logger.warning( 29 | "'model_name_or_path' is not set. Fallback to 'cross-encoder/ms-marco-MiniLM-L-6-v2'" 30 | ) 31 | configs.reranker_params["model_name_or_path"] = ( 32 | "cross-encoder/ms-marco-MiniLM-L-6-v2" 33 | ) 34 | model_name = configs.reranker_params.pop("model_name_or_path") 35 | self.model = CrossEncoder(model_name, **configs.reranker_params) 36 | 37 | async def compute_similarity(self, results: list[QueryResult]): 38 | scores = self.model.predict([(str(res.chunk), res.query[0]) for res in results]) 39 | 40 | for res, score in zip(results, scores): 41 | res.scores = (score,) 42 | -------------------------------------------------------------------------------- /lua/vectorcode/jobrunner/cmd.lua: -------------------------------------------------------------------------------- 1 | ---@type VectorCode.JobRunner 2 | local runner = {} 3 | 4 | ---@type table 5 | local jobs = {} 6 | local logger = require("vectorcode.config").logger 7 | 8 | function runner.run_async(args, callback, bufnr) 9 | if type(callback) == "function" then 10 | callback = vim.schedule_wrap(callback) 11 | else 12 | callback = nil 13 | end 14 | logger.debug( 15 | ("cmd jobrunner for buffer %s args: %s"):format(bufnr, vim.inspect(args)) 16 | ) 17 | 18 | table.insert( 19 | args, 20 | 1, 21 | require("vectorcode.config").get_user_config().cli_cmds.vectorcode 22 | ) 23 | 24 | ---@type vim.SystemObj? 25 | local job 26 | job = vim.system(args, {}, function(out) 27 | if job and job.pid then 28 | jobs[job.pid] = job 29 | end 30 | local stdout = out.stdout or "{}" 31 | if stdout == "" then 32 | stdout = "{}" 33 | end 34 | local _, decoded = pcall(vim.json.decode, stdout, { object = true, array = true }) 35 | if type(callback) == "function" then 36 | callback(decoded or {}, out.stderr, out.code, out.signal) 37 | end 38 | end) 39 | jobs[job.pid] = job 40 | return tonumber(job.pid) 41 | end 42 | 43 | function runner.run(args, timeout_ms, bufnr) 44 | if timeout_ms == nil or timeout_ms < 0 then 45 | timeout_ms = 2 ^ 31 - 1 46 | end 47 | local res, err, code, signal 48 | local pid = runner.run_async(args, function(result, error, e_code, s) 49 | res = result 50 | err = error 51 | code = e_code 52 | signal = s 53 | end, bufnr) 54 | if pid ~= nil and jobs[pid] ~= nil then 55 | jobs[pid]:wait(timeout_ms) 56 | end 57 | return res or {}, err, code, signal 58 | end 59 | 60 | function runner.is_job_running(job) 61 | return jobs[job] ~= nil 62 | end 63 | 64 | function runner.stop_job(job_handle) 65 | local job = jobs[job_handle] 66 | if job ~= nil then 67 | job:kill(15) 68 | end 69 | end 70 | 71 | return runner 72 | -------------------------------------------------------------------------------- /lua/vectorcode/cacher/init.lua: -------------------------------------------------------------------------------- 1 | local vc_config = require("vectorcode.config") 2 | local jobrunner = require("vectorcode.jobrunner.cmd") 3 | 4 | return { 5 | lsp = require("vectorcode.cacher.lsp"), 6 | default = require("vectorcode.cacher.default"), 7 | utils = { 8 | ---Checks if VectorCode has been configured properly for your project. 9 | ---See the CLI manual for details. 10 | ---@param check_item string? 11 | ---@param on_success fun(out: vim.SystemCompleted)? 12 | ---@param on_failure fun(out: vim.SystemCompleted?)? 13 | async_check = function(check_item, on_success, on_failure) 14 | if not vc_config.has_cli() then 15 | if on_failure ~= nil then 16 | on_failure() 17 | end 18 | return 19 | end 20 | check_item = check_item or "config" 21 | jobrunner.run_async( 22 | { "check", check_item }, 23 | function(result, _error, code, signal) 24 | local out_msg = nil 25 | if type(result) == "table" and #result > 0 then 26 | out_msg = table.concat(vim.iter(result):flatten(math.huge):totable()) 27 | elseif type(result) == "string" then 28 | out_msg = result 29 | end 30 | 31 | local err_msg = nil 32 | if type(_error) == "table" and #_error > 0 then 33 | err_msg = table.concat(vim.iter(_error):flatten(math.huge):totable()) 34 | elseif type(_error) == "string" then 35 | out_msg = _error 36 | end 37 | 38 | local out = { 39 | stdout = out_msg, 40 | stderr = err_msg, 41 | code = code, 42 | signal = signal, 43 | } 44 | if out.code == 0 and type(on_success) == "function" then 45 | vim.schedule_wrap(on_success)(out) 46 | elseif out.code ~= 0 and type(on_failure) == "function" then 47 | vim.schedule_wrap(on_failure)(out) 48 | end 49 | end, 50 | 0 51 | ) 52 | end, 53 | }, 54 | } 55 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | tags: 4 | - '[0-9]+.[0-9]+.[0-9]+' 5 | 6 | jobs: 7 | pypi-publish: 8 | name: upload release to PyPI 9 | runs-on: ubuntu-latest 10 | permissions: 11 | # This permission is needed for private repositories. 12 | contents: write 13 | # IMPORTANT: this permission is mandatory for trusted publishing 14 | id-token: write 15 | steps: 16 | - uses: actions/checkout@v4 17 | 18 | - uses: pdm-project/setup-pdm@v4 19 | 20 | - name: Publish package distributions to PyPI 21 | run: pdm publish 22 | 23 | - name: Generate Changelog 24 | id: changelog 25 | uses: mikepenz/release-changelog-builder-action@v5 26 | with: 27 | mode: "PR" 28 | configurationJson: | 29 | { 30 | "categories": [ 31 | { 32 | "title": "## 💥 Breaking Change", 33 | "labels": ["breaking"] 34 | }, 35 | { 36 | "title": "## 🚀 Features", 37 | "labels": ["feature", "enhancement"] 38 | }, 39 | { 40 | "title": "## 🐛 Fixes", 41 | "labels": ["fix", "bug"] 42 | }, 43 | { 44 | "title": "## 🧪 Tests", 45 | "labels": ["test"] 46 | }, 47 | { 48 | "title": "## 📖 Documentation", 49 | "labels": ["documentation"] 50 | } 51 | ], 52 | } 53 | 54 | token: ${{ secrets.GITHUB_TOKEN }} 55 | 56 | - name: Create Release 57 | uses: ncipollo/release-action@v1.16.0 58 | with: 59 | draft: false 60 | makeLatest: true 61 | name: Release ${{ github.ref_name }} 62 | body: ${{ steps.changelog.outputs.changelog }} 63 | token: ${{ secrets.GITHUB_TOKEN }} 64 | artifacts: "./dist/*" 65 | -------------------------------------------------------------------------------- /lua/vectorcode/integrations/codecompanion/common.lua: -------------------------------------------------------------------------------- 1 | ---@module "codecompanion" 2 | 3 | local job_runner 4 | local vc_config = require("vectorcode.config") 5 | local notify_opts = vc_config.notify_opts 6 | local logger = vc_config.logger 7 | 8 | local TOOL_RESULT_SOURCE = "VectorCodeToolResult" 9 | 10 | return { 11 | tool_result_source = TOOL_RESULT_SOURCE, 12 | 13 | ---@param t table|string|nil 14 | ---@return string 15 | flatten_table_to_string = function(t) 16 | vim.deprecate( 17 | "vectorcode.integrations.codecompanion.common.flatten_table_to_string", 18 | "vectorcode.utils.flatten_table_to_string", 19 | "1.0.0", 20 | "vectorcode", 21 | true 22 | ) 23 | return require("vectorcode.utils").flatten_table_to_string(t) 24 | end, 25 | 26 | ---@param use_lsp boolean 27 | ---@return VectorCode.JobRunner 28 | initialise_runner = function(use_lsp) 29 | if job_runner == nil then 30 | if use_lsp then 31 | job_runner = require("vectorcode.jobrunner.lsp") 32 | end 33 | if job_runner == nil then 34 | job_runner = require("vectorcode.jobrunner.cmd") 35 | logger.info("Using cmd runner for CodeCompanion tool.") 36 | if use_lsp then 37 | vim.schedule_wrap(vim.notify)( 38 | "Failed to initialise the LSP runner. Falling back to cmd runner.", 39 | vim.log.levels.WARN, 40 | notify_opts 41 | ) 42 | end 43 | else 44 | logger.info("Using LSP runner for CodeCompanion tool.") 45 | end 46 | end 47 | return job_runner 48 | end, 49 | 50 | ---Convert `path` to a relative path if it's within the current project. 51 | ---When `base` is `nil`, this function will attempt to find a project root 52 | ---or use `cwd`. 53 | ---@param path string 54 | ---@param base? string 55 | ---@return string 56 | cleanup_path = function(path, base) 57 | base = base or vim.fs.root(0, { ".vectorcode", ".git" }) or vim.uv.cwd() or "." 58 | return vim.fs.relpath(base, path) or path 59 | end, 60 | } 61 | -------------------------------------------------------------------------------- /src/vectorcode/debugging.py: -------------------------------------------------------------------------------- 1 | import atexit 2 | import cProfile 3 | import logging 4 | import os 5 | import pstats 6 | from datetime import datetime 7 | 8 | __LOG_DIR = os.path.expanduser("~/.local/share/vectorcode/logs/") 9 | 10 | logger = logging.getLogger(name=__name__) 11 | 12 | __profiler: cProfile.Profile | None = None 13 | 14 | 15 | def _ensure_log_dir(): 16 | """Ensure the log directory exists""" 17 | os.makedirs(__LOG_DIR, exist_ok=True) 18 | 19 | 20 | def finish(): 21 | """Clean up profiling and save results""" 22 | if __profiler is not None: 23 | try: 24 | __profiler.disable() 25 | stats_file = os.path.join( 26 | __LOG_DIR, 27 | f"cprofile-{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.stats", 28 | ) 29 | __profiler.dump_stats(stats_file) 30 | print(f"cProfile stats saved to: {stats_file}") 31 | 32 | # Print summary stats 33 | stats = pstats.Stats(__profiler) 34 | stats.sort_stats("cumulative") 35 | stats.print_stats(20) 36 | except Exception as e: 37 | logger.warning(f"Failed to save cProfile output: {e}") 38 | 39 | 40 | def enable(): 41 | """Enable cProfile-based profiling and crash debugging""" 42 | global __profiler 43 | 44 | try: 45 | _ensure_log_dir() 46 | 47 | # Initialize cProfile for comprehensive profiling 48 | __profiler = cProfile.Profile() 49 | __profiler.enable() 50 | atexit.register(finish) 51 | logger.info("cProfile profiling enabled successfully") 52 | 53 | try: 54 | import coredumpy # noqa: F401 55 | 56 | logger.info("coredumpy crash debugging enabled successfully") 57 | coredumpy.patch_except(directory=__LOG_DIR) 58 | except Exception as e: 59 | logger.warning( 60 | f"Crash debugging will not be available. Failed to import coredumpy: {e}" 61 | ) 62 | 63 | except Exception as e: 64 | logger.error(f"Failed to initialize cProfile: {e}") 65 | logger.warning("Profiling will not be available for this session") 66 | -------------------------------------------------------------------------------- /tests/subcommands/test_drop.py: -------------------------------------------------------------------------------- 1 | from contextlib import asynccontextmanager 2 | from unittest.mock import AsyncMock, patch 3 | 4 | import pytest 5 | 6 | from vectorcode.cli_utils import Config 7 | from vectorcode.subcommands.drop import drop 8 | 9 | 10 | @pytest.fixture 11 | def mock_config(): 12 | config = Config( 13 | project_root="/path/to/project", 14 | ) # Removed positional args 15 | return config 16 | 17 | 18 | @pytest.fixture 19 | def mock_client(): 20 | return AsyncMock() 21 | 22 | 23 | @pytest.fixture 24 | def mock_collection(): 25 | collection = AsyncMock() 26 | collection.name = "test_collection" 27 | collection.metadata = {"path": "/path/to/project"} 28 | return collection 29 | 30 | 31 | @pytest.mark.asyncio 32 | async def test_drop_success(mock_config, mock_client, mock_collection): 33 | mock_client.get_collection.return_value = mock_collection 34 | mock_client.delete_collection = AsyncMock() 35 | with ( 36 | patch("vectorcode.subcommands.drop.ClientManager") as MockClientManager, 37 | patch( 38 | "vectorcode.subcommands.drop.get_collection", return_value=mock_collection 39 | ), 40 | ): 41 | mock_client = AsyncMock() 42 | 43 | @asynccontextmanager 44 | async def _get_client(self, config=None, need_lock=True): 45 | yield mock_client 46 | 47 | mock_client_manager = MockClientManager.return_value 48 | mock_client_manager._create_client = AsyncMock(return_value=mock_client) 49 | mock_client_manager.get_client = _get_client 50 | 51 | result = await drop(mock_config) 52 | assert result == 0 53 | mock_client.delete_collection.assert_called_once_with(mock_collection.name) 54 | 55 | 56 | @pytest.mark.asyncio 57 | async def test_drop_collection_not_found(mock_config, mock_client): 58 | mock_client.get_collection.side_effect = ValueError("Collection not found") 59 | with patch("vectorcode.subcommands.drop.ClientManager"): 60 | with patch( 61 | "vectorcode.subcommands.drop.get_collection", 62 | side_effect=ValueError("Collection not found"), 63 | ): 64 | result = await drop(mock_config) 65 | assert result == 1 66 | -------------------------------------------------------------------------------- /.github/workflows/test_and_cov.yml: -------------------------------------------------------------------------------- 1 | name: Test and Coverage 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | test: 13 | strategy: 14 | matrix: 15 | python-version: [3.11, 3.12, 3.13] 16 | os: [ubuntu-latest, macos-latest, macos-14, ubuntu-24.04-arm] 17 | exclude: 18 | - os: macos-14 19 | python-version: 3.13 20 | runs-on: ${{ matrix.os }} 21 | timeout-minutes: 60 22 | env: 23 | COREDUMPY_DUMP_DIR: ${{ github.workspace }}/coredumpy_data 24 | steps: 25 | - uses: actions/checkout@v4 26 | 27 | - name: setup pdm 28 | uses: pdm-project/setup-pdm@v4 29 | with: 30 | python-version: ${{ matrix.python-version }} 31 | 32 | - name: Install uv 33 | uses: astral-sh/setup-uv@v5 34 | with: 35 | python-version: ${{ matrix.python-version }} 36 | 37 | - name: Configure pdm to use uv 38 | run: pdm config use_uv true 39 | 40 | - name: install pdm and dependencies 41 | run: make deps 42 | 43 | - name: Set custom HF cache directory 44 | run: | 45 | export HF_HOME="$GITHUB_WORKSPACE/hf_cache" 46 | export SENTENCE_TRANSFORMERS_HOME="$HF_HOME" 47 | mkdir -p "$HF_HOME" 48 | [ -z "$(ls "$HF_HOME")" ] || rm "${HF_HOME:?}/*" -rf && true 49 | 50 | - name: run tests 51 | run: pdm run pytest --enable-coredumpy --coredumpy-dir ${{ env.COREDUMPY_DUMP_DIR }} 52 | 53 | - name: run coverage 54 | run: | 55 | pdm run coverage run -m pytest 56 | pdm run coverage report -m 57 | pdm run coverage xml -i 58 | 59 | - name: static analysis by basedpyright 60 | run: pdm run basedpyright 61 | 62 | - name: upload coverage reports to codecov 63 | uses: codecov/codecov-action@v5 64 | with: 65 | token: ${{ secrets.codecov_token }} 66 | 67 | - name: Upload coredumpy data if applicable 68 | uses: gaogaotiantian/upload-coredumpy@v0.2 69 | if: failure() 70 | with: 71 | name: coredumpy_data_${{ matrix.os }}_${{ matrix.python-version }} 72 | path: ${{ env.COREDUMPY_DUMP_DIR }} 73 | retention-days: 7 74 | -------------------------------------------------------------------------------- /src/vectorcode/subcommands/query/reranker/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | from typing import Type 4 | 5 | from vectorcode.cli_utils import Config 6 | 7 | from .base import RerankerBase 8 | from .cross_encoder import CrossEncoderReranker 9 | from .naive import NaiveReranker 10 | 11 | __all__ = ["RerankerBase", "NaiveReranker", "CrossEncoderReranker"] 12 | 13 | logger = logging.getLogger(name=__name__) 14 | 15 | __supported_rerankers: dict[str, Type[RerankerBase]] = { 16 | "CrossEncoderReranker": CrossEncoderReranker, 17 | "NaiveReranker": NaiveReranker, 18 | } 19 | 20 | 21 | class RerankerError(Exception): 22 | pass 23 | 24 | 25 | class RerankerInitialisationError(RerankerError): 26 | pass 27 | 28 | 29 | def add_reranker(cls): 30 | """ 31 | This is a class decorator that allows you to add a custom reranker that can be 32 | recognised by the `get_reranker` function. 33 | 34 | Your reranker should inherit `RerankerBase` and be decorated by `add_reranker`: 35 | ```python 36 | @add_reranker 37 | class CustomReranker(RerankerBase): 38 | # override the methods according to your need. 39 | ``` 40 | """ 41 | if issubclass(cls, RerankerBase): 42 | if __supported_rerankers.get(cls.__name__): 43 | error_message = f"{cls.__name__} has been registered." 44 | raise AttributeError(error_message) 45 | __supported_rerankers[cls.__name__] = cls 46 | return cls 47 | else: 48 | error_message = f'{cls} should be a subclass of "RerankerBase"' 49 | raise TypeError(error_message) 50 | 51 | 52 | def get_available_rerankers(): 53 | return list(__supported_rerankers.values()) 54 | 55 | 56 | def get_reranker(configs: Config) -> RerankerBase: 57 | if configs.reranker: 58 | if hasattr(sys.modules[__name__], configs.reranker): 59 | # dynamic dispatch for built-in rerankers 60 | return getattr(sys.modules[__name__], configs.reranker).create(configs) 61 | 62 | elif issubclass( 63 | __supported_rerankers.get(configs.reranker, type(None)), RerankerBase 64 | ): 65 | return __supported_rerankers[configs.reranker].create(configs) 66 | 67 | if not configs.reranker: 68 | return NaiveReranker(configs) 69 | else: 70 | raise RerankerInitialisationError() 71 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "VectorCode" 3 | dynamic = ["version"] 4 | description = "A tool to vectorise repositories for RAG." 5 | authors = [{ name = "Davidyz", email = "hzjlyz@gmail.com" }] 6 | dependencies = [ 7 | "chromadb<=0.6.3", 8 | "sentence-transformers", 9 | "pathspec", 10 | "tabulate", 11 | "shtab", 12 | "numpy", 13 | "psutil", 14 | "httpx", 15 | "tree-sitter!=0.25.0", 16 | "tree-sitter-language-pack", 17 | "pygments", 18 | "transformers>=4.36.0,!=4.51.0,!=4.51.1,!=4.51.2", 19 | "wheel<0.46.0", 20 | "colorlog", 21 | "charset-normalizer>=3.4.1", 22 | "json5", 23 | "posthog<6.0.0", 24 | "filelock>=3.15.0", 25 | ] 26 | requires-python = ">=3.11,<3.14" 27 | readme = "README.md" 28 | license = { text = "MIT" } 29 | 30 | [project.urls] 31 | homepage = "https://github.com/Davidyz/VectorCode" 32 | github = "https://github.com/Davidyz/VectorCode" 33 | documentation = "https://github.com/Davidyz/VectorCode/blob/main/docs/cli.md" 34 | 35 | [project.scripts] 36 | vectorcode = "vectorcode.main:main" 37 | vectorcode-server = "vectorcode.lsp_main:main" 38 | vectorcode-mcp-server = "vectorcode.mcp_main:main" 39 | 40 | [build-system] 41 | requires = ["pdm-backend"] 42 | build-backend = "pdm.backend" 43 | 44 | [tool.coverage.run] 45 | omit = [ 46 | "./tests/*", 47 | "src/vectorcode/_version.py", 48 | "src/vectorcode/__init__.py", 49 | "src/vectorcode/debugging.py", 50 | "/tmp/*", 51 | ] 52 | include = ['src/vectorcode/**/*.py'] 53 | 54 | 55 | [tool.pdm] 56 | distribution = true 57 | 58 | [tool.pdm.version] 59 | source = "scm" 60 | write_to = "./vectorcode/_version.py" 61 | write_template = "__version__ = '{}' # pragma: no cover" 62 | 63 | [dependency-groups] 64 | dev = [ 65 | "ipython>=8.31.0", 66 | "ruff>=0.9.1", 67 | "pre-commit>=4.0.1", 68 | "pytest>=8.3.4", 69 | "pdm-backend>=2.4.3", 70 | "coverage>=7.6.12", 71 | "pytest-asyncio>=0.25.3", 72 | "debugpy>=1.8.12", 73 | "basedpyright>=1.29.2", 74 | ] 75 | 76 | [project.optional-dependencies] 77 | legacy = ["numpy<2.0.0", "torch==2.2.2", "transformers<=4.49.0"] 78 | intel = ['optimum[openvino]', 'openvino'] 79 | lsp = ['pygls<2.0.0', 'lsprotocol'] 80 | mcp = ['mcp<2.0.0', 'pydantic'] 81 | debug = ["coredumpy>=0.4.1"] 82 | 83 | [tool.basedpyright] 84 | typeCheckingMode = "standard" 85 | ignore = ["./tests/"] 86 | -------------------------------------------------------------------------------- /src/vectorcode/subcommands/ls.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import socket 5 | 6 | import tabulate 7 | from chromadb.api import AsyncClientAPI 8 | from chromadb.api.types import IncludeEnum 9 | 10 | from vectorcode.cli_utils import Config, cleanup_path 11 | from vectorcode.common import ClientManager, get_collections 12 | 13 | logger = logging.getLogger(name=__name__) 14 | 15 | 16 | async def get_collection_list(client: AsyncClientAPI) -> list[dict]: 17 | result = [] 18 | async for collection in get_collections(client): 19 | meta = collection.metadata 20 | document_meta = await collection.get(include=[IncludeEnum.metadatas]) 21 | unique_files = set( 22 | i.get("path") for i in (document_meta["metadatas"] or []) if i is not None 23 | ) 24 | result.append( 25 | { 26 | "project-root": cleanup_path(meta["path"]), 27 | "user": meta.get("username"), 28 | "hostname": socket.gethostname(), 29 | "collection_name": collection.name, 30 | "size": await collection.count(), 31 | "embedding_function": meta["embedding_function"], 32 | "num_files": len(unique_files), 33 | } 34 | ) 35 | return result 36 | 37 | 38 | async def ls(configs: Config) -> int: 39 | async with ClientManager().get_client(configs) as client: 40 | result: list[dict] = await get_collection_list(client) 41 | logger.info(f"Found the following collections: {result}") 42 | 43 | if configs.pipe: 44 | print(json.dumps(result)) 45 | else: 46 | table = [] 47 | for meta in result: 48 | project_root = meta["project-root"] 49 | if os.environ.get("HOME"): 50 | project_root = project_root.replace(os.environ["HOME"], "~") 51 | row = [ 52 | project_root, 53 | meta["size"], 54 | meta["num_files"], 55 | meta["embedding_function"], 56 | ] 57 | table.append(row) 58 | print( 59 | tabulate.tabulate( 60 | table, 61 | headers=[ 62 | "Project Root", 63 | "Collection Size", 64 | "Number of Files", 65 | "Embedding Function", 66 | ], 67 | ) 68 | ) 69 | return 0 70 | -------------------------------------------------------------------------------- /plugin/vectorcode.lua: -------------------------------------------------------------------------------- 1 | local vc_config = require("vectorcode.config") 2 | local notify_opts = vc_config.notify_opts 3 | 4 | ---@param args string[]? 5 | ---@return table 6 | local function process_args(args) 7 | if args == nil then 8 | return {} 9 | end 10 | local result = {} 11 | for _, str in pairs(args) do 12 | local equal = string.find(str, "=") 13 | if equal then 14 | local key = string.sub(str, 1, equal - 1) 15 | local value = string.sub(str, equal + 1) --[[@as any]] 16 | result[key] = value 17 | end 18 | end 19 | return result 20 | end 21 | 22 | vim.api.nvim_create_user_command("VectorCode", function(args) 23 | local cacher = vc_config.get_cacher_backend() 24 | local splitted_args = vim.tbl_filter(function(str) 25 | return str ~= nil and str ~= "" 26 | end, vim.split(args.args, " ")) 27 | local action = table.remove(splitted_args, 1) 28 | if action == "register" then 29 | local bufnr = vim.api.nvim_get_current_buf() 30 | cacher.register_buffer(bufnr, { 31 | run_on_register = true, 32 | project_root = process_args(splitted_args).project_root, 33 | }) 34 | vim.notify( 35 | ("Buffer %d has been registered for VectorCode."):format(bufnr), 36 | vim.log.levels.INFO, 37 | notify_opts 38 | ) 39 | elseif action == "deregister" then 40 | local buf_nr = vim.api.nvim_get_current_buf() 41 | cacher.deregister_buffer(buf_nr, { notify = true }) 42 | else 43 | vim.notify( 44 | ([[Command "VectorCode %s" was not recognised.]]):format(args.args), 45 | vim.log.levels.ERROR, 46 | notify_opts 47 | ) 48 | end 49 | end, { 50 | nargs = 1, 51 | complete = function(arglead, cmd, _) 52 | local cacher = vc_config.get_cacher_backend() 53 | local splitted_cmd = vim.tbl_filter(function(str) 54 | return str ~= nil and str ~= "" 55 | end, vim.split(cmd, " ")) 56 | 57 | if #splitted_cmd < 2 then 58 | if cacher.buf_is_registered(0) then 59 | return { "register", "deregister" } 60 | else 61 | return { "register" } 62 | end 63 | elseif #splitted_cmd == 2 and splitted_cmd[2] == "register" then 64 | return { "project_root=" } 65 | elseif splitted_cmd[2] == "register" and #splitted_cmd == 3 then 66 | local prefix = "project_root=" 67 | if string.find(splitted_cmd[3], prefix) == 1 then 68 | local partial = arglead:sub(#prefix + 1) 69 | local dirs = vim.fn.getcompletion(partial, "dir") 70 | for i = 1, #dirs do 71 | dirs[i] = prefix .. dirs[i] 72 | end 73 | return dirs 74 | end 75 | end 76 | end, 77 | }) 78 | -------------------------------------------------------------------------------- /tests/subcommands/query/test_types.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from tree_sitter import Point 3 | 4 | from vectorcode.chunking import Chunk 5 | from vectorcode.subcommands.query.types import QueryResult 6 | 7 | 8 | def make_dummy_chunk(): 9 | return QueryResult( 10 | path="dummy1.py", 11 | chunk=Chunk( 12 | text="hello", start=Point(row=1, column=0), end=Point(row=1, column=4) 13 | ), 14 | query=["hello"], 15 | scores=[0.9], 16 | ) 17 | 18 | 19 | def test_QueryResult_merge(): 20 | res1, res2 = (make_dummy_chunk(), make_dummy_chunk()) 21 | res2.query = ["bye"] 22 | res2.scores = [0.1] 23 | 24 | merged = QueryResult.merge(res1, res2) 25 | assert merged.path == res1.path 26 | assert merged.chunk == res1.chunk 27 | assert merged.mean_score() == 0.5 28 | assert merged.query == ("hello", "bye") 29 | 30 | 31 | def test_QueryResult_merge_failed(): 32 | res1, res2 = (make_dummy_chunk(), make_dummy_chunk()) 33 | res2.path = "dummy2.py" 34 | with pytest.raises(ValueError): 35 | QueryResult.merge(res1, res2) 36 | 37 | 38 | def test_QueryResult_group_by_path(): 39 | res1, res2 = (make_dummy_chunk(), make_dummy_chunk()) 40 | res2.chunk = Chunk( 41 | "hello", start=Point(row=2, column=0), end=Point(row=2, column=4) 42 | ) 43 | res2.query = ["bye"] 44 | res2.scores = [0.1] 45 | 46 | grouped_dict = QueryResult.group(res1, res2) 47 | assert len(grouped_dict.keys()) == 1 48 | assert len(grouped_dict["dummy1.py"]) == 2 49 | 50 | 51 | def test_QueryResult_group_by_chunk(): 52 | res1, res2 = (make_dummy_chunk(), make_dummy_chunk()) 53 | res2.query = ["bye"] 54 | res2.scores = [0.1] 55 | 56 | grouped_dict = QueryResult.group(res1, res2, by="chunk") 57 | assert len(grouped_dict.keys()) == 1 58 | assert len(grouped_dict[res1.chunk]) == 2 59 | 60 | 61 | def test_QueryResult_group_top_k(): 62 | res1, res2 = (make_dummy_chunk(), make_dummy_chunk()) 63 | res2.chunk = Chunk( 64 | "hello", start=Point(row=2, column=0), end=Point(row=2, column=4) 65 | ) 66 | res2.query = ["bye"] 67 | res2.scores = [0.1] 68 | 69 | grouped_dict = QueryResult.group(res1, res2, top_k=1) 70 | assert len(grouped_dict.keys()) == 1 71 | assert len(grouped_dict["dummy1.py"]) == 1 72 | assert grouped_dict["dummy1.py"][0].query[0] == "hello" 73 | 74 | 75 | def test_QueryResult_lt(): 76 | res1, res2 = (make_dummy_chunk(), make_dummy_chunk()) 77 | res2.chunk = Chunk( 78 | "hello", start=Point(row=2, column=0), end=Point(row=2, column=4) 79 | ) 80 | res2.query = ["bye"] 81 | res2.scores = [0.1] 82 | assert res2 < res1 83 | -------------------------------------------------------------------------------- /docs/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Before You Start 2 | 3 | As much as I appreciate everyone who wish to contribute to this project, if 4 | you want to submit a new feature/refactoring that is more than a few lines, 5 | it'll be better if you open an issue or discussion before you start working. 6 | This allows us to thoroughly exchange ideas before people invest too much time, 7 | and will help me maintain the codebase in the long run. 8 | 9 | # Technical Stuff 10 | 11 | This project uses [pre-commit](https://pre-commit.com/) to perform some 12 | formatting and linting. If you're 13 | contributing to this project, having it on your system will help you write code 14 | that passes the CI. 15 | You can also see 16 | [.pre-commit-config.yaml](https://github.com/Davidyz/VectorCode/blob/main/.pre-commit-config.yaml) 17 | for a list of hooks enabled for the repo. 18 | 19 | ## Python CLI 20 | 21 | The development and publication of this tool is managed by 22 | [pdm](https://pdm-project.org/en/latest/). 23 | 24 | Once you've cloned and `cd`ed into the repo, run `make deps`. This will call 25 | some `pdm` commands to install development dependencies. Some of them are 26 | actually optional, but for convenience I decided to leave them here. This will 27 | include [pytest](https://docs.pytest.org/en/stable/), the testing framework, 28 | and [coverage.py](https://coverage.readthedocs.io/en/7.7.1/), the coverage 29 | report tool. If you're not familiar with pytest or coverage.py, you can run `make test` to 30 | run tests on all python code, and `make coverage` to generate a coverage report. 31 | The testing and coverage report are also in the CI configuration, but it might 32 | still help to run them locally before you open the PR. 33 | 34 | This project also runs static analysis with 35 | [basedpyright](https://docs.basedpyright.com). GitHub Action will also run the 36 | check when a PR is submitted. This, as well as `ruff check`, are both included 37 | in `make lint`. 38 | 39 | You may also find it helpful to 40 | [enable logging](https://github.com/Davidyz/VectorCode/blob/main/docs/cli.md#debugging-and-diagnosing) 41 | for the CLI when developing new features or working on fixes. 42 | 43 | ## Neovim Plugin 44 | 45 | At the moment, there isn't much to cover on here. As long as the code is 46 | formatted (stylua) and appropriately type-annotated, you're good. I do have 47 | plans to write some tests, but before that happens, formatting and type 48 | annotations are the only things that you need to take special care of. 49 | 50 | The lua codebase is linted by [selene](https://github.com/Kampfkarren/selene). 51 | You may run `make lint` or call `selene` from the CLI to lint the code. 52 | 53 | You may find it useful to 54 | [enable logging](https://github.com/Davidyz/VectorCode/blob/main/docs/cli.md#debugging-and-diagnosing) 55 | when you're poking around the codebase. 56 | -------------------------------------------------------------------------------- /lua/vectorcode/jobrunner/init.lua: -------------------------------------------------------------------------------- 1 | local utils = require("vectorcode.utils") 2 | 3 | ---@alias VectorCode.JobRunner.Callback fun(result: table|nil, error: table|nil, code:integer, signal: integer?) 4 | 5 | --- A class for calling vectorcode commands that aims at providing a unified API for both LSP and command-line backend. 6 | --- Implementations exist for both direct command-line execution (`cmd.lua`) and LSP (`lsp.lua`). 7 | --- For the format of the `result`, see https://github.com/Davidyz/VectorCode/blob/main/docs/cli.md#for-developers 8 | ---@class VectorCode.JobRunner 9 | --- Runs a vectorcode command asynchronously. 10 | --- Executes the command specified by `args`. Upon completion, if `callback` is provided, 11 | --- it's invoked with the following arguments: 12 | --- - `result`: the JSON object of the command execution result. 13 | --- - `error`: error messages, if any. 14 | --- - `code`: exit code (or error code) for the process. 15 | --- - `signal`: _for cmd runner only_, the shell signal sent to the process. 16 | --- The `bufnr` is used for context, potentially to find the project root or attach LSP clients. 17 | --- Returns a job handle (e.g., PID or LSP request ID) or nil if the job couldn't be started. 18 | ---@field run_async fun(args: string[], callback:VectorCode.JobRunner.Callback?, bufnr: integer):(job_handle:integer?) 19 | --- Runs a vectorcode command synchronously, blocking until completion or timeout. 20 | --- Executes the command specified by `args`. Waits for up to `timeout_ms` milliseconds. 21 | --- The `bufnr` is used for context, potentially to find the project root or attach LSP clients. 22 | --- Returns the following objects: 23 | --- - `result`: the JSON object of the command execution result. 24 | --- - `error`: error messages, if any. 25 | --- - `code`: exit code (or error code) for the process. 26 | --- - `signal`: _for cmd runner only_, the shell signal sent to the process. 27 | ---@field run fun(args: string[], timeout_ms: integer?, bufnr: integer):(result:table|nil, error:table|nil, code:integer, signal: integer?) 28 | --- Checks if a job associated with the given handle is currently running. 29 | --- Returns true if the job is running, false otherwise. 30 | ---@field is_job_running fun(job_handle: integer):boolean 31 | --- Attempts to stop or cancel the job associated with the given handle. 32 | ---@field stop_job fun(job_handle: integer) 33 | --- Optional initialization function. Some runners (like LSP) might require an initialization step. 34 | ---@field init function? 35 | 36 | return { 37 | --- Automatically find project_root from buffer path if it's not already specified. 38 | ---@param args string[] 39 | ---@param bufnr integer 40 | ---@return string[] 41 | find_root = function(args, bufnr) 42 | if not vim.list_contains(args, "--project_root") then 43 | local find_root = utils.find_root(bufnr) 44 | if find_root then 45 | vim.list_extend(args, { "--project_root", find_root }) 46 | end 47 | end 48 | return args 49 | end, 50 | } 51 | -------------------------------------------------------------------------------- /src/vectorcode/subcommands/prompt.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | 4 | from vectorcode.cli_utils import Config, PromptCategory 5 | 6 | logger = logging.getLogger(name=__name__) 7 | 8 | prompt_by_categories: dict[str | PromptCategory, list[str]] = { 9 | PromptCategory.query: [ 10 | "separate phrases into distinct keywords when appropriate", 11 | "If a class, type or function has been imported from another file, this tool may be able to find its source. Add the name of the imported symbol to the query", 12 | "When providing answers based on VectorCode results, try to give references such as paths to files and line ranges, unless you're told otherwise (but do not include the full source code context)", 13 | "Avoid retrieving one single file because the retrieval mechanism may not be very accurate", 14 | "If the query results do not contain the needed context, increase the file count so that the result will more likely contain the desired files", 15 | "If the returned paths are relative, they are relative to the root of the project directory", 16 | "Do not suggest edits to retrieved files that are outside of the current working directory, unless the user instructed otherwise", 17 | "When specifying the `project_root` parameter when making a query, make sure you run the `ls` tool first to retrieve a list of valid, indexed projects", 18 | "If a query failed to retrieve desired results, a new attempt should use different keywords that are orthogonal to the previous ones but with similar meanings", 19 | "Do not use exact query keywords that you have used in a previous tool call in the conversation, unless the user instructed otherwise, or with different count/project_root", 20 | "Include related keywords as the search query. For example, when querying for `function`, include `return value`, `parameter`, `arguments` and alike.", 21 | ], 22 | PromptCategory.ls: [ 23 | "Use `ls` tool to obtain a list of indexed projects that are available to be queried by the `query` command." 24 | ], 25 | PromptCategory.vectorise: [ 26 | "When vectorising the files, provide accurate and case-sensitive paths to the file" 27 | ], 28 | "general": [ 29 | "VectorCode is the name of this tool. Do not include it in the query unless the user explicitly asks", 30 | "**Use at your discretion** when you feel you don't have enough information about the repository or project", 31 | "**Don't escape** special characters", 32 | ], 33 | } 34 | prompt_strings = [] 35 | 36 | 37 | def prompts(configs: Config) -> int: 38 | results = prompt_by_categories["general"].copy() 39 | for item in sorted(set(configs.prompt_categories or [PromptCategory.query])): 40 | logger.info(f"Loading {len(prompt_by_categories[item])} prompts for {item}") 41 | results.extend(prompt_by_categories[item]) 42 | results.sort() 43 | if configs.pipe: 44 | print(json.dumps(results)) 45 | else: 46 | for i in results: 47 | print(f"- {i}") 48 | return 0 49 | -------------------------------------------------------------------------------- /tests/subcommands/test_clean.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import AsyncMock, patch 2 | 3 | import pytest 4 | from chromadb.api import AsyncClientAPI 5 | 6 | from vectorcode.cli_utils import Config 7 | from vectorcode.subcommands.clean import clean, run_clean_on_client 8 | 9 | 10 | @pytest.mark.asyncio 11 | async def test_run_clean_on_client(): 12 | mock_client = AsyncMock(spec=AsyncClientAPI) 13 | mock_collection1 = AsyncMock() 14 | mock_collection1.name = "test_collection_1" 15 | mock_collection1.metadata = {"path": "/test/path1"} 16 | mock_collection1.count.return_value = 0 # Empty collection 17 | mock_collection2 = AsyncMock() 18 | mock_collection2.name = "test_collection_2" 19 | mock_collection2.metadata = {"path": "/test/path2"} 20 | mock_collection2.count.return_value = 1 # Non-empty collection 21 | 22 | async def mock_get_collections(client): 23 | yield mock_collection1 24 | yield mock_collection2 25 | 26 | with ( 27 | patch("vectorcode.subcommands.clean.get_collections", new=mock_get_collections), 28 | patch("os.path.isdir", return_value=lambda x: x == "/test/path2"), 29 | ): 30 | await run_clean_on_client(mock_client, pipe_mode=False) 31 | 32 | mock_client.delete_collection.assert_called_once_with(mock_collection1.name) 33 | 34 | 35 | @pytest.mark.asyncio 36 | async def test_run_clean_on_client_pipe_mode(): 37 | mock_client = AsyncMock(spec=AsyncClientAPI) 38 | mock_collection1 = AsyncMock() 39 | mock_collection1.name = "test_collection_1" 40 | mock_collection1.metadata = {"path": "/test/path1"} 41 | mock_collection1.count.return_value = 0 # Empty collection 42 | 43 | async def mock_get_collections(client): 44 | yield mock_collection1 45 | 46 | with patch( 47 | "vectorcode.subcommands.clean.get_collections", new=mock_get_collections 48 | ): 49 | await run_clean_on_client(mock_client, pipe_mode=True) 50 | 51 | mock_client.delete_collection.assert_called_once_with(mock_collection1.name) 52 | 53 | 54 | @pytest.mark.asyncio 55 | async def test_run_clean_on_removed_dir(): 56 | mock_client = AsyncMock(spec=AsyncClientAPI) 57 | mock_collection1 = AsyncMock() 58 | mock_collection1.name = "test_collection_1" 59 | mock_collection1.metadata = {"path": "/test/path1"} 60 | mock_collection1.count.return_value = 10 61 | 62 | async def mock_get_collections(client): 63 | yield mock_collection1 64 | 65 | with ( 66 | patch("vectorcode.subcommands.clean.get_collections", new=mock_get_collections), 67 | patch("os.path.isdir", return_value=False), 68 | ): 69 | await run_clean_on_client(mock_client, pipe_mode=True) 70 | 71 | mock_client.delete_collection.assert_called_once_with(mock_collection1.name) 72 | 73 | 74 | @pytest.mark.asyncio 75 | async def test_clean(): 76 | AsyncMock(spec=AsyncClientAPI) 77 | mock_config = Config(pipe=False) 78 | 79 | with patch("vectorcode.subcommands.clean.ClientManager"): 80 | result = await clean(mock_config) 81 | 82 | assert result == 0 83 | -------------------------------------------------------------------------------- /lua/vectorcode/integrations/codecompanion/ls_tool.lua: -------------------------------------------------------------------------------- 1 | ---@module "codecompanion" 2 | 3 | local vc_config = require("vectorcode.config") 4 | local utils = require("vectorcode.utils") 5 | local logger = vc_config.logger 6 | 7 | ---@type VectorCode.CodeCompanion.LsToolOpts 8 | local default_ls_options = { 9 | use_lsp = vc_config.get_user_config().async_backend == "lsp", 10 | } 11 | 12 | ---@param opts VectorCode.CodeCompanion.LsToolOpts|{}|nil 13 | ---@return VectorCode.CodeCompanion.LsToolOpts 14 | local get_ls_tool_opts = function(opts) 15 | opts = vim.tbl_deep_extend("force", default_ls_options, opts or {}) 16 | logger.info( 17 | string.format( 18 | "Loading `vectorcode_ls` with the following opts:\n%s", 19 | vim.inspect(opts) 20 | ) 21 | ) 22 | return opts 23 | end 24 | 25 | ---@param opts VectorCode.CodeCompanion.LsToolOpts 26 | ---@return CodeCompanion.Tools.Tool 27 | return function(opts) 28 | opts = get_ls_tool_opts(opts) 29 | local job_runner = 30 | require("vectorcode.integrations.codecompanion.common").initialise_runner( 31 | opts.use_lsp 32 | ) 33 | local tool_name = "vectorcode_ls" 34 | ---@type CodeCompanion.Tools.Tool|{} 35 | return { 36 | name = tool_name, 37 | cmds = { 38 | ---@param tools CodeCompanion.Tools 39 | ---@return nil|{ status: string, data: string } 40 | function(tools, _, _, cb) 41 | job_runner.run_async({ "ls", "--pipe" }, function(result, error) 42 | if vim.islist(result) and #result > 0 then 43 | cb({ status = "success", data = result }) 44 | else 45 | if type(error) == "table" then 46 | error = utils.flatten_table_to_string(error, "Unknown error.") 47 | end 48 | cb({ 49 | status = "error", 50 | data = error, 51 | }) 52 | end 53 | end, tools.chat.bufnr) 54 | end, 55 | }, 56 | schema = { 57 | type = "function", 58 | ["function"] = { 59 | name = tool_name, 60 | description = [[ 61 | Retrieve a list of projects accessible via the VectorCode tools. 62 | Where relevant, use paths from this tool as the `project_root` parameter in other vectorcode tools. 63 | ]], 64 | parameters = { 65 | -- make anthropic models happy. 66 | type = "object", 67 | properties = vim.empty_dict(), 68 | required = {}, 69 | additionalProperties = false, 70 | }, 71 | }, 72 | }, 73 | output = { 74 | ---@param tools CodeCompanion.Tools 75 | ---@param stdout VectorCode.LsResult[][] 76 | success = function(_, tools, _, stdout) 77 | stdout = stdout[#stdout] 78 | local user_message 79 | for i, col in ipairs(stdout) do 80 | if i == 1 then 81 | user_message = 82 | string.format("**VectorCode `ls` Tool**: Found %d collections.", #stdout) 83 | else 84 | user_message = "" 85 | end 86 | tools.chat:add_tool_output( 87 | tools.tool, 88 | string.format("%s", col["project-root"]), 89 | user_message 90 | ) 91 | end 92 | end, 93 | }, 94 | } 95 | end 96 | -------------------------------------------------------------------------------- /src/vectorcode/subcommands/query/reranker/base.py: -------------------------------------------------------------------------------- 1 | import heapq 2 | import logging 3 | from abc import ABC, abstractmethod 4 | from typing import Any 5 | 6 | import numpy 7 | 8 | from vectorcode.chunking import Chunk 9 | from vectorcode.cli_utils import Config, QueryInclude 10 | from vectorcode.subcommands.query.types import QueryResult 11 | 12 | logger = logging.getLogger(name=__name__) 13 | 14 | 15 | class RerankerBase(ABC): 16 | """This is the base class for the rerankers. 17 | You should use the configs.reranker_params field to store and pass the parameters used for your reranker. 18 | You should implement the `compute_similarity` method, which will be called by `rerank` to compute 19 | similarity scores between search query and results. 20 | The items in the returned list should be sorted such that the relevance decreases along the list. 21 | 22 | The class doc string will be added to the error message if your reranker fails to initialise. 23 | Thus, this is a good place to put the instructions to configuring your reranker. 24 | """ 25 | 26 | def __init__(self, configs: Config, **kwargs: Any): 27 | self.configs = configs 28 | assert self.configs.query is not None, ( 29 | "'configs' should contain the query messages." 30 | ) 31 | self.n_result = configs.n_result 32 | self._raw_results: list[QueryResult] = [] 33 | 34 | @classmethod 35 | def create(cls, configs: Config, **kwargs: Any): 36 | try: 37 | return cls(configs, **kwargs) 38 | except Exception as e: 39 | e.add_note( 40 | "\n" 41 | + ( 42 | cls.__doc__ 43 | or f"There was an issue initialising {cls}. Please doublecheck your configuration." 44 | ) 45 | ) 46 | raise 47 | 48 | @abstractmethod 49 | async def compute_similarity( 50 | self, results: list[QueryResult] 51 | ) -> None: # pragma: nocover 52 | """ 53 | Modify the `QueryResult.scores` field **IN-PLACE** so that they contain the correct scores. 54 | """ 55 | raise NotImplementedError 56 | 57 | async def rerank(self, results: list[QueryResult]) -> list[str | Chunk]: 58 | if len(results) == 0: 59 | return [] 60 | 61 | # compute the similarity scores 62 | await self.compute_similarity(results) 63 | 64 | # group the results by the query type: file (path) or chunk 65 | # and only keep the `top_k` results for each group 66 | group_by = "path" 67 | if QueryInclude.chunk in self.configs.include: 68 | group_by = "chunk" 69 | grouped_results = QueryResult.group(*results, by=group_by, top_k="auto") 70 | 71 | # compute the mean scores for each of the groups 72 | scores: dict[Chunk | str, float] = {} 73 | for key in grouped_results.keys(): 74 | scores[key] = float( 75 | numpy.mean(tuple(i.mean_score() for i in grouped_results[key])) 76 | ) 77 | 78 | return list( 79 | i 80 | for i in heapq.nlargest( 81 | self.configs.n_result, grouped_results.keys(), key=lambda x: scores[x] 82 | ) 83 | ) 84 | -------------------------------------------------------------------------------- /src/vectorcode/subcommands/query/types.py: -------------------------------------------------------------------------------- 1 | import heapq 2 | from collections import defaultdict 3 | from dataclasses import dataclass 4 | from typing import Literal, Union 5 | 6 | import numpy 7 | 8 | from vectorcode.chunking import Chunk 9 | 10 | 11 | @dataclass 12 | class QueryResult: 13 | """ 14 | The container for one single query result. 15 | 16 | args: 17 | - path: path to the file 18 | - content: `vectorcode.chunking.Chunk` object that stores the chunk 19 | - query: query messages used for the search 20 | - scores: similarity scores for the corresponding query. 21 | """ 22 | 23 | path: str 24 | chunk: Chunk 25 | query: tuple[str, ...] 26 | scores: tuple[float, ...] 27 | 28 | @classmethod 29 | def merge(cls, *results: "QueryResult") -> "QueryResult": 30 | """ 31 | Given the results of a single chunk/document from different queries, merge them into a single `QueryResult` object. 32 | """ 33 | for i in range(len(results) - 1): 34 | if (i < len(results) - 1) and not results[i].is_same_doc(results[i + 1]): 35 | raise ValueError( 36 | f"The inputs are not the same chunk: {results[i]}, {results[i + 1]}" 37 | ) 38 | 39 | return QueryResult( 40 | path=results[0].path, 41 | chunk=results[0].chunk, 42 | query=sum((tuple(i.query) for i in results), start=tuple()), 43 | scores=sum((tuple(i.scores) for i in results), start=tuple()), 44 | ) 45 | 46 | @staticmethod 47 | def group( 48 | *results: "QueryResult", 49 | by: Union[Literal["path"], Literal["chunk"]] = "path", 50 | top_k: int | Literal["auto"] | None = None, 51 | ) -> dict[Chunk | str, list["QueryResult"]]: 52 | """ 53 | Group the query results based on `key`. 54 | 55 | args: 56 | - `by`: either "path" or "chunk" 57 | - `top_k`: if set, only return the top k results for each group based on mean scores. If "auto", top k is decided by the mean number of results per group. 58 | 59 | returns: 60 | - a dictionary that maps either path or chunk to a list of `QueryResult` object. 61 | 62 | """ 63 | assert by in {"path", "chunk"} 64 | grouped_result: dict[Chunk | str, list["QueryResult"]] = defaultdict(list) 65 | 66 | for res in results: 67 | grouped_result[getattr(res, by)].append(res) 68 | 69 | if top_k == "auto": 70 | top_k = int(numpy.mean(tuple(len(i) for i in grouped_result.values()))) 71 | 72 | if top_k and top_k > 0: 73 | for group in grouped_result.keys(): 74 | grouped_result[group] = heapq.nlargest(top_k, grouped_result[group]) 75 | return grouped_result 76 | 77 | def mean_score(self): 78 | return float(numpy.mean(self.scores)) 79 | 80 | def __lt__(self, other: "QueryResult"): 81 | assert isinstance(other, QueryResult) 82 | return self.mean_score() < other.mean_score() 83 | 84 | def __gt__(self, other: "QueryResult"): 85 | assert isinstance(other, QueryResult) 86 | return self.mean_score() > other.mean_score() 87 | 88 | def __eq__(self, other: object, /) -> bool: 89 | return ( 90 | isinstance(other, QueryResult) and self.mean_score() == other.mean_score() 91 | ) 92 | 93 | def is_same_doc(self, other: "QueryResult") -> bool: 94 | return self.path == other.path and self.chunk == other.chunk 95 | -------------------------------------------------------------------------------- /tests/subcommands/test_chunks.py: -------------------------------------------------------------------------------- 1 | import json 2 | from unittest.mock import MagicMock, patch 3 | 4 | import pytest 5 | from tree_sitter import Point 6 | 7 | from vectorcode.chunking import Chunk, TreeSitterChunker 8 | from vectorcode.cli_utils import Config 9 | from vectorcode.subcommands import chunks 10 | 11 | 12 | @pytest.mark.asyncio 13 | async def test_chunks(): 14 | # Mock the Config object 15 | mock_config = MagicMock(spec=Config) 16 | mock_config.chunk_size = 2000 17 | mock_config.overlap_ratio = 0.2 18 | mock_config.files = ["file1.py", "file2.py"] 19 | 20 | # Mock the TreeSitterChunker 21 | mock_chunker = TreeSitterChunker(mock_config) 22 | mock_chunker.chunk = MagicMock() 23 | mock_chunker.chunk.side_effect = [ 24 | [Chunk("chunk1_file1", None, None), Chunk("chunk2_file1", None, None)], 25 | [ 26 | Chunk("chunk1_file2", Point(1, 0), Point(1, 11)), 27 | Chunk("chunk2_file2", Point(1, 0), Point(1, 11)), 28 | ], 29 | ] 30 | with patch( 31 | "vectorcode.subcommands.chunks.TreeSitterChunker", return_value=mock_chunker 32 | ): 33 | # Call the chunks function 34 | result = await chunks(mock_config) 35 | 36 | # Assertions 37 | assert result == 0 38 | assert mock_chunker.config == mock_config 39 | mock_chunker.chunk.assert_called() 40 | assert mock_chunker.chunk.call_count == 2 41 | 42 | 43 | @pytest.mark.asyncio 44 | async def test_chunks_pipe(capsys): 45 | # Mock the Config object 46 | mock_config = MagicMock(spec=Config) 47 | mock_config.chunk_size = 2000 48 | mock_config.overlap_ratio = 0.2 49 | mock_config.files = ["file1.py"] 50 | mock_config.pipe = True 51 | 52 | # Mock the TreeSitterChunker 53 | mock_chunker = TreeSitterChunker(mock_config) 54 | mock_chunker.chunk = MagicMock() 55 | _chunks = [ 56 | Chunk("chunk1_file1", Point(0, 1), Point(0, 12), path="file1.py", id="c1"), 57 | Chunk("chunk2_file1", Point(1, 1), Point(1, 12), path="file1.py", id="c2"), 58 | ] 59 | mock_chunker.chunk.side_effect = [ 60 | _chunks, 61 | ] 62 | with patch( 63 | "vectorcode.subcommands.chunks.TreeSitterChunker", return_value=mock_chunker 64 | ): 65 | # Call the chunks function 66 | result = await chunks(mock_config) 67 | 68 | # Assertions 69 | assert result == 0 70 | assert mock_chunker.config == mock_config 71 | mock_chunker.chunk.assert_called() 72 | assert mock_chunker.chunk.call_count == 1 73 | 74 | captured = capsys.readouterr() 75 | output = json.loads(captured.out.strip()) 76 | assert output == [ 77 | [ 78 | { 79 | "text": "chunk1_file1", 80 | "start": { 81 | "row": 0, 82 | "column": 1, 83 | }, 84 | "end": {"row": 0, "column": 12}, 85 | "path": "file1.py", 86 | "chunk_id": "c1", 87 | }, 88 | { 89 | "text": "chunk2_file1", 90 | "start": { 91 | "row": 1, 92 | "column": 1, 93 | }, 94 | "end": {"row": 1, "column": 12}, 95 | "path": "file1.py", 96 | "chunk_id": "c2", 97 | }, 98 | ] 99 | ] 100 | -------------------------------------------------------------------------------- /src/vectorcode/subcommands/update.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | import os 4 | import sys 5 | from asyncio import Lock 6 | 7 | import tqdm 8 | from chromadb.api.types import IncludeEnum 9 | from chromadb.errors import InvalidCollectionException 10 | 11 | from vectorcode.cli_utils import Config 12 | from vectorcode.common import ClientManager, get_collection, verify_ef 13 | from vectorcode.subcommands.vectorise import VectoriseStats, chunked_add, show_stats 14 | 15 | logger = logging.getLogger(name=__name__) 16 | 17 | 18 | async def update(configs: Config) -> int: 19 | async with ClientManager().get_client(configs) as client: 20 | try: 21 | collection = await get_collection(client, configs, False) 22 | except IndexError as e: 23 | print( 24 | f"{e.__class__.__name__}: Failed to get/create the collection. Please check your config." 25 | ) 26 | return 1 27 | except (ValueError, InvalidCollectionException) as e: 28 | print( 29 | f"{e.__class__.__name__}: There's no existing collection for {configs.project_root}", 30 | file=sys.stderr, 31 | ) 32 | return 1 33 | if collection is None: # pragma: nocover 34 | logger.error( 35 | f"Failed to find a collection at {configs.project_root} from {configs.db_url}" 36 | ) 37 | return 1 38 | if not verify_ef(collection, configs): # pragma: nocover 39 | return 1 40 | 41 | metas = (await collection.get(include=[IncludeEnum.metadatas]))["metadatas"] 42 | if metas is None or len(metas) == 0: # pragma: nocover 43 | logger.debug("Empty collection.") 44 | return 0 45 | 46 | files_gen = (str(meta.get("path", "")) for meta in metas) 47 | files = set() 48 | orphanes = set() 49 | for file in files_gen: 50 | if os.path.isfile(file): 51 | files.add(file) 52 | else: 53 | orphanes.add(file) 54 | 55 | stats = VectoriseStats(removed=len(orphanes)) 56 | collection_lock = Lock() 57 | stats_lock = Lock() 58 | max_batch_size = await client.get_max_batch_size() 59 | semaphore = asyncio.Semaphore(os.cpu_count() or 1) 60 | 61 | with tqdm.tqdm( 62 | total=len(files), desc="Vectorising files...", disable=configs.pipe 63 | ) as bar: 64 | logger.info(f"Updating embeddings for {len(files)} file(s).") 65 | try: 66 | tasks = [ 67 | asyncio.create_task( 68 | chunked_add( 69 | str(file), 70 | collection, 71 | collection_lock, 72 | stats, 73 | stats_lock, 74 | configs, 75 | max_batch_size, 76 | semaphore, 77 | ) 78 | ) 79 | for file in files 80 | ] 81 | for task in asyncio.as_completed(tasks): 82 | await task 83 | bar.update(1) 84 | except asyncio.CancelledError: # pragma: nocover 85 | print("Abort.", file=sys.stderr) 86 | return 1 87 | 88 | if len(orphanes): 89 | logger.info(f"Removing {len(orphanes)} orphaned files from database.") 90 | await collection.delete(where={"path": {"$in": list(orphanes)}}) 91 | 92 | show_stats(configs, stats) 93 | return 0 94 | -------------------------------------------------------------------------------- /tests/subcommands/files/test_files_ls.py: -------------------------------------------------------------------------------- 1 | import json 2 | from unittest.mock import AsyncMock, patch 3 | 4 | import pytest 5 | from chromadb.api.models.AsyncCollection import AsyncCollection 6 | 7 | from vectorcode.cli_utils import CliAction, Config, FilesAction 8 | from vectorcode.subcommands.files.ls import ls 9 | 10 | 11 | @pytest.fixture 12 | def client(): 13 | return AsyncMock() 14 | 15 | 16 | @pytest.fixture 17 | def collection(): 18 | col = AsyncMock(spec=AsyncCollection) 19 | col.get.return_value = { 20 | "ids": ["id1", "id2", "id3"], 21 | "distances": [0.1, 0.2, 0.3], 22 | "metadatas": [ 23 | {"path": "file1.py", "start": 1, "end": 1}, 24 | {"path": "file2.py", "start": 1, "end": 1}, 25 | {"path": "file3.py", "start": 1, "end": 1}, 26 | ], 27 | "documents": [ 28 | "content1", 29 | "content2", 30 | "content3", 31 | ], 32 | } 33 | return col 34 | 35 | 36 | @pytest.mark.asyncio 37 | async def test_ls(client, collection, capsys): 38 | with ( 39 | patch("vectorcode.subcommands.files.ls.ClientManager") as MockClientManager, 40 | patch( 41 | "vectorcode.subcommands.files.ls.get_collection", return_value=collection 42 | ), 43 | patch("vectorcode.common.try_server", return_value=True), 44 | ): 45 | MockClientManager.return_value._create_client.return_value = client 46 | await ls(Config(action=CliAction.files, files_action=FilesAction.ls)) 47 | out = capsys.readouterr().out 48 | assert "file1.py" in out 49 | assert "file2.py" in out 50 | assert "file3.py" in out 51 | 52 | 53 | @pytest.mark.asyncio 54 | async def test_ls_piped(client, collection, capsys): 55 | with ( 56 | patch("vectorcode.subcommands.files.ls.ClientManager") as MockClientManager, 57 | patch( 58 | "vectorcode.subcommands.files.ls.get_collection", return_value=collection 59 | ), 60 | patch("vectorcode.common.try_server", return_value=True), 61 | ): 62 | MockClientManager.return_value._create_client.return_value = client 63 | await ls(Config(action=CliAction.files, files_action=FilesAction.ls, pipe=True)) 64 | out = capsys.readouterr().out 65 | assert json.dumps(["file1.py", "file2.py", "file3.py"]).strip() == out.strip() 66 | 67 | 68 | @pytest.mark.asyncio 69 | async def test_ls_no_collection(client, collection, capsys): 70 | with ( 71 | patch("vectorcode.subcommands.files.ls.ClientManager") as MockClientManager, 72 | patch("vectorcode.subcommands.files.ls.get_collection", side_effect=ValueError), 73 | ): 74 | MockClientManager.return_value._create_client.return_value = client 75 | assert ( 76 | await ls( 77 | Config(action=CliAction.files, files_action=FilesAction.ls, pipe=True) 78 | ) 79 | != 0 80 | ) 81 | 82 | 83 | @pytest.mark.asyncio 84 | async def test_ls_empty_collection(client, capsys): 85 | mock_collection = AsyncMock(spec=AsyncCollection) 86 | mock_collection.get.return_value = {} 87 | with ( 88 | patch("vectorcode.subcommands.files.ls.ClientManager") as MockClientManager, 89 | patch( 90 | "vectorcode.subcommands.files.ls.get_collection", 91 | return_value=mock_collection, 92 | ), 93 | patch("vectorcode.common.try_server", return_value=True), 94 | ): 95 | MockClientManager.return_value._create_client.return_value = client 96 | assert ( 97 | await ls(Config(action=CliAction.files, files_action=FilesAction.ls)) == 0 98 | ) 99 | -------------------------------------------------------------------------------- /tests/subcommands/files/test_files_rm.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import AsyncMock, patch 2 | 3 | import pytest 4 | from chromadb.api.models.AsyncCollection import AsyncCollection 5 | 6 | from vectorcode.cli_utils import CliAction, Config, FilesAction 7 | from vectorcode.subcommands.files.rm import rm 8 | 9 | 10 | @pytest.fixture 11 | def client(): 12 | return AsyncMock() 13 | 14 | 15 | @pytest.fixture 16 | def collection(): 17 | col = AsyncMock(spec=AsyncCollection) 18 | col.get.return_value = { 19 | "ids": ["id1", "id2", "id3"], 20 | "distances": [0.1, 0.2, 0.3], 21 | "metadatas": [ 22 | {"path": "file1.py", "start": 1, "end": 1}, 23 | {"path": "file2.py", "start": 1, "end": 1}, 24 | {"path": "file3.py", "start": 1, "end": 1}, 25 | ], 26 | "documents": [ 27 | "content1", 28 | "content2", 29 | "content3", 30 | ], 31 | } 32 | col.name = "test_collection" 33 | return col 34 | 35 | 36 | @pytest.mark.asyncio 37 | async def test_rm(client, collection, capsys): 38 | with ( 39 | patch("vectorcode.subcommands.files.rm.ClientManager") as MockClientManager, 40 | patch( 41 | "vectorcode.subcommands.files.rm.get_collection", return_value=collection 42 | ), 43 | patch("vectorcode.common.try_server", return_value=True), 44 | patch("os.path.isfile", return_value=True), 45 | patch( 46 | "vectorcode.subcommands.files.rm.expand_path", side_effect=lambda x, y: x 47 | ), 48 | ): 49 | MockClientManager.return_value._create_client.return_value = client 50 | config = Config( 51 | action=CliAction.files, 52 | files_action=FilesAction.rm, 53 | rm_paths=["file1.py"], 54 | ) 55 | await rm(config) 56 | collection.delete.assert_called_with(where={"path": {"$in": ["file1.py"]}}) 57 | 58 | 59 | @pytest.mark.asyncio 60 | async def test_rm_empty_collection(client, collection, capsys): 61 | with ( 62 | patch( 63 | "vectorcode.subcommands.files.rm.get_collection", return_value=collection 64 | ), 65 | patch("vectorcode.common.try_server", return_value=True), 66 | patch("os.path.isfile", return_value=True), 67 | patch( 68 | "vectorcode.subcommands.files.rm.expand_path", side_effect=lambda x, y: x 69 | ), 70 | patch( 71 | "vectorcode.subcommands.files.rm.ClientManager._create_client", 72 | return_value=client, 73 | ), 74 | ): 75 | config = Config( 76 | action=CliAction.files, 77 | files_action=FilesAction.rm, 78 | rm_paths=["file1.py"], 79 | ) 80 | collection.count = AsyncMock(return_value=0) 81 | client.delete_collection = AsyncMock() 82 | await rm(config) 83 | client.delete_collection.assert_called_once_with(collection.name) 84 | 85 | 86 | @pytest.mark.asyncio 87 | async def test_rm_no_collection(client, collection, capsys): 88 | with ( 89 | patch("vectorcode.subcommands.files.rm.ClientManager") as MockClientManager, 90 | patch("vectorcode.subcommands.files.rm.get_collection", side_effect=ValueError), 91 | ): 92 | MockClientManager.return_value._create_client.return_value = client 93 | assert ( 94 | await rm( 95 | Config( 96 | action=CliAction.files, 97 | files_action=FilesAction.rm, 98 | pipe=True, 99 | rm_paths=["file1.py"], 100 | ) 101 | ) 102 | != 0 103 | ) 104 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm-project.org/#use-with-ide 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | 164 | # VectorCode 165 | src/vectorcode/_version.py 166 | -------------------------------------------------------------------------------- /lua/vectorcode/integrations/codecompanion/files_ls_tool.lua: -------------------------------------------------------------------------------- 1 | ---@module "codecompanion" 2 | 3 | local cc_common = require("vectorcode.integrations.codecompanion.common") 4 | local vc_config = require("vectorcode.config") 5 | local utils = require("vectorcode.utils") 6 | 7 | local default_opts = { 8 | use_lsp = vc_config.get_user_config().async_backend == "lsp", 9 | } 10 | 11 | ---@param opts VectorCode.CodeCompanion.FilesLsToolOpts 12 | ---@return CodeCompanion.Tools.Tool 13 | return function(opts) 14 | opts = vim.tbl_deep_extend("force", default_opts, opts or {}) 15 | local job_runner = 16 | require("vectorcode.integrations.codecompanion.common").initialise_runner( 17 | opts.use_lsp 18 | ) 19 | local tool_name = "vectorcode_files_ls" 20 | ---@type CodeCompanion.Tools.Tool|{} 21 | return { 22 | name = tool_name, 23 | cmds = { 24 | ---@param tools CodeCompanion.Tools 25 | ---@param action {project_root: string} 26 | ---@return nil|{ status: string, data: string } 27 | function(tools, action, _, cb) 28 | local args = { "files", "ls", "--pipe" } 29 | action = utils.fix_nil(action) 30 | if action ~= nil then 31 | action.project_root = action.project_root 32 | or vim.fs.root(0, { ".vectorcode", ".git" }) 33 | if action.project_root ~= nil then 34 | action.project_root = vim.fs.normalize(action.project_root) 35 | if utils.is_directory(action.project_root) then 36 | vim.list_extend(args, { "--project_root", action.project_root }) 37 | end 38 | end 39 | end 40 | job_runner.run_async(args, function(result, error) 41 | if vim.islist(result) and #result > 0 then 42 | cb({ status = "success", data = result }) 43 | else 44 | if type(error) == "table" then 45 | error = utils.flatten_table_to_string(error, "Unknown error.") 46 | end 47 | cb({ 48 | status = "error", 49 | data = error, 50 | }) 51 | end 52 | end, tools.chat.bufnr) 53 | end, 54 | }, 55 | schema = { 56 | type = "function", 57 | ["function"] = { 58 | name = tool_name, 59 | description = [[ 60 | Retrieve a list of files that have been added to the database for a given project. 61 | **ABSOLUTE PATHS** in the results indicate that the files are OUTSIDE of the current working directories and you can **ONLY** access them via the VectorCode tools. 62 | **RELATIVE PATHS** in the results indicate that the files are INSIDE the current project. You can use VectorCode tools or any other tools that the user provided to interact with them. They are relative to the project root. 63 | ]], 64 | parameters = { 65 | type = "object", 66 | properties = { 67 | project_root = { 68 | type = "string", 69 | description = [[ 70 | The project that the files belong to. 71 | The value should be one of the following: 72 | - One of the paths from the `vectorcode_ls` tool; 73 | - User input; 74 | - `null` (omit this parameter), which means the current project, if found. 75 | ]], 76 | }, 77 | }, 78 | }, 79 | }, 80 | }, 81 | output = { 82 | ---@param tools CodeCompanion.Tools 83 | ---@param stdout string[][] 84 | success = function(_, tools, _, stdout) 85 | stdout = stdout[#stdout] 86 | local user_message 87 | for i, col in ipairs(stdout) do 88 | if i == 1 then 89 | user_message = 90 | string.format("**VectorCode `files_ls` Tool**: Found %d files.", #stdout) 91 | else 92 | user_message = "" 93 | end 94 | tools.chat:add_tool_output( 95 | tools.tool, 96 | string.format("%s", cc_common.cleanup_path(col)), 97 | user_message 98 | ) 99 | end 100 | end, 101 | }, 102 | } 103 | end 104 | -------------------------------------------------------------------------------- /lua/vectorcode/integrations/codecompanion/files_rm_tool.lua: -------------------------------------------------------------------------------- 1 | ---@module "codecompanion" 2 | 3 | local cc_common = require("vectorcode.integrations.codecompanion.common") 4 | local vc_config = require("vectorcode.config") 5 | local utils = require("vectorcode.utils") 6 | 7 | local default_opts = { 8 | use_lsp = vc_config.get_user_config().async_backend == "lsp", 9 | } 10 | 11 | ---@alias FilesRmArgs { paths: string[], project_root: string? } 12 | 13 | ---@param opts VectorCode.CodeCompanion.FilesRmToolOpts 14 | ---@return CodeCompanion.Tools 15 | return function(opts) 16 | opts = vim.tbl_deep_extend("force", default_opts, opts or {}) 17 | 18 | local tool_name = "vectorcode_files_rm" 19 | local job_runner = cc_common.initialise_runner(opts.use_lsp) 20 | 21 | ---@type CodeCompanion.Tools|{} 22 | return { 23 | name = tool_name, 24 | schema = { 25 | type = "function", 26 | ["function"] = { 27 | name = tool_name, 28 | description = "Remove files from the VectorCode database. The files will remain in the file system.", 29 | parameters = { 30 | type = "object", 31 | properties = { 32 | paths = { 33 | type = "array", 34 | items = { type = "string" }, 35 | description = "Paths to the files to be removed from the database.", 36 | }, 37 | project_root = { 38 | type = "string", 39 | description = [[ 40 | The project that the files belong to. 41 | The value should be one of the following: 42 | - One of the paths from the `vectorcode_ls` tool; 43 | - User input; 44 | - `null` (omit this parameter), which means the current project, if found. 45 | ]], 46 | }, 47 | }, 48 | required = { "paths" }, 49 | }, 50 | strict = true, 51 | }, 52 | }, 53 | cmds = { 54 | ---@param tools CodeCompanion.Tools 55 | ---@param action VectoriseToolArgs 56 | ---@return nil|{ status: string, data: string } 57 | function(tools, action, _, cb) 58 | local args = { "files", "rm", "--pipe" } 59 | action = utils.fix_nil(action) 60 | if action.project_root then 61 | local project_root = vim.fs.abspath(vim.fs.normalize(action.project_root)) 62 | if utils.is_directory(project_root) then 63 | vim.list_extend(args, { "--project_root", project_root }) 64 | else 65 | return { status = "error", data = "Invalid path " .. project_root } 66 | end 67 | end 68 | if action.paths == nil or #action.paths == 0 then 69 | return { status = "error", data = "Please specify at least one path." } 70 | end 71 | vim.list_extend( 72 | args, 73 | vim 74 | .iter(action.paths) 75 | :filter( 76 | ---@param item string 77 | function(item) 78 | return utils.is_file(item) 79 | end 80 | ) 81 | :totable() 82 | ) 83 | job_runner.run_async( 84 | args, 85 | ---@param result VectoriseResult 86 | function(result, error, code, _) 87 | if code == 0 then 88 | cb({ status = "success", data = result }) 89 | else 90 | cb({ status = "error", data = { error = error, code = code } }) 91 | end 92 | end, 93 | tools.chat.bufnr 94 | ) 95 | end, 96 | }, 97 | output = { 98 | ---@param self CodeCompanion.Tools.Tool 99 | prompt = function(self, _) 100 | return string.format( 101 | "Remove %d files from VectorCode database?", 102 | #self.args.paths 103 | ) 104 | end, 105 | ---@param self CodeCompanion.Tools.Tool 106 | ---@param tools CodeCompanion.Tools 107 | success = function(self, tools, _, _) 108 | tools.chat:add_tool_output(self, "**VectorCode `files_rm` tool**: successful.") 109 | end, 110 | }, 111 | } 112 | end 113 | -------------------------------------------------------------------------------- /.github/workflows/panvimdoc.yml: -------------------------------------------------------------------------------- 1 | name: panvimdoc 2 | 3 | on: 4 | push: 5 | branches-ignore: 6 | - 'main' 7 | 8 | permissions: 9 | contents: write 10 | 11 | jobs: 12 | docs: 13 | runs-on: ubuntu-latest 14 | name: pandoc to vimdoc 15 | steps: 16 | - uses: actions/checkout@v4 17 | 18 | - name: panvimdoc 19 | uses: kdheepak/panvimdoc@main 20 | with: 21 | vimdoc: "VectorCode" # Output vimdoc project name (required) 22 | pandoc: "./docs/neovim/README.md" # Input pandoc file 23 | toc: true # Table of contents 24 | description: "A code repository indexing tool to supercharge your LLM experience." # Project description used in title (if empty, uses neovim version and current date) 25 | titledatepattern: "%Y %B %d" # Pattern for the date that used in the title 26 | demojify: true # Strip emojis from the vimdoc 27 | dedupsubheadings: true # Add heading to subheading anchor links to ensure that subheadings are unique 28 | treesitter: true # Use treesitter for highlighting codeblocks 29 | ignorerawblocks: true # Ignore raw html blocks in markdown when converting to vimdoc 30 | docmapping: false # Use h4 headers as mapping docs 31 | docmappingprojectname: true # Use project name in tag when writing mapping docs 32 | shiftheadinglevelby: 0 # Shift heading levels by specified number 33 | incrementheadinglevelby: 0 # Increment heading levels by specified number 34 | 35 | - name: panvimdoc 36 | uses: kdheepak/panvimdoc@main 37 | with: 38 | vimdoc: "VectorCode-API" # Output vimdoc project name (required) 39 | pandoc: "./docs/neovim/api_references.md" # Input pandoc file 40 | toc: true # Table of contents 41 | description: "A code repository indexing tool to supercharge your LLM experience." # Project description used in title (if empty, uses neovim version and current date) 42 | titledatepattern: "%Y %B %d" # Pattern for the date that used in the title 43 | demojify: true # Strip emojis from the vimdoc 44 | dedupsubheadings: true # Add heading to subheading anchor links to ensure that subheadings are unique 45 | treesitter: true # Use treesitter for highlighting codeblocks 46 | ignorerawblocks: true # Ignore raw html blocks in markdown when converting to vimdoc 47 | docmapping: false # Use h4 headers as mapping docs 48 | docmappingprojectname: true # Use project name in tag when writing mapping docs 49 | shiftheadinglevelby: 0 # Shift heading levels by specified number 50 | incrementheadinglevelby: 0 # Increment heading levels by specified number 51 | 52 | - name: panvimdoc 53 | uses: kdheepak/panvimdoc@main 54 | with: 55 | vimdoc: "VectorCode-cli" # Output vimdoc project name (required) 56 | pandoc: "./docs/cli.md" # Input pandoc file 57 | toc: true # Table of contents 58 | description: "A code repository indexing tool to supercharge your LLM experience." # Project description used in title (if empty, uses neovim version and current date) 59 | titledatepattern: "%Y %B %d" # Pattern for the date that used in the title 60 | demojify: true # Strip emojis from the vimdoc 61 | dedupsubheadings: true # Add heading to subheading anchor links to ensure that subheadings are unique 62 | treesitter: true # Use treesitter for highlighting codeblocks 63 | ignorerawblocks: true # Ignore raw html blocks in markdown when converting to vimdoc 64 | docmapping: false # Use h4 headers as mapping docs 65 | docmappingprojectname: true # Use project name in tag when writing mapping docs 66 | shiftheadinglevelby: 0 # Shift heading levels by specified number 67 | incrementheadinglevelby: 0 # Increment heading levels by specified number 68 | 69 | - uses: stefanzweifel/git-auto-commit-action@v6.0.1 70 | with: 71 | commit_message: "Auto generate docs" 72 | branch: ${{ github.head_ref }} 73 | file_pattern: 'doc/*.txt' 74 | -------------------------------------------------------------------------------- /lua/vectorcode/integrations/copilotchat.lua: -------------------------------------------------------------------------------- 1 | ---@module "CopilotChat" 2 | 3 | ---@class VectorCode.CopilotChat.ContextOpts 4 | ---@field max_num number? 5 | ---@field use_lsp boolean? 6 | 7 | local async = require("plenary.async") 8 | local vc_config = require("vectorcode.config") 9 | local logger = vc_config.logger 10 | local notify_opts = vc_config.notify_opts 11 | local check_cli_wrap = vc_config.check_cli_wrap 12 | local job_runner = nil 13 | 14 | ---@param use_lsp boolean 15 | local function get_runner(use_lsp) 16 | if job_runner == nil then 17 | if use_lsp then 18 | job_runner = require("vectorcode.jobrunner.lsp") 19 | end 20 | if job_runner == nil then 21 | job_runner = require("vectorcode.jobrunner.cmd") 22 | if use_lsp then 23 | vim.schedule_wrap(vim.notify)( 24 | "Failed to initialise the LSP runner. Falling back to cmd runner.", 25 | vim.log.levels.WARN, 26 | notify_opts 27 | ) 28 | end 29 | end 30 | end 31 | return job_runner 32 | end 33 | 34 | ---@param args string[] 35 | ---@param use_lsp boolean 36 | ---@param bufnr integer 37 | ---@async 38 | local run_job = async.wrap(function(args, use_lsp, bufnr, callback) 39 | local runner = get_runner(use_lsp) 40 | assert(runner ~= nil, "Failed to initialize the runner!") 41 | runner.run_async(args, callback, bufnr) 42 | end, 4) 43 | 44 | ---@param opts VectorCode.CopilotChat.ContextOpts? 45 | ---@return CopilotChat.config.context 46 | local make_context_provider = check_cli_wrap(function(opts) 47 | opts = vim.tbl_deep_extend("force", { 48 | max_num = 5, 49 | use_lsp = vc_config.get_user_config().async_backend == "lsp", 50 | }, opts or {}) 51 | logger.info("Creating CopilotChat context provider with the following opts:\n", opts) 52 | 53 | local utils = require("CopilotChat.utils") 54 | 55 | return { 56 | description = [[This gives you the ability to access the repository to find information that you may need to assist the user. Supports input (query). 57 | 58 | - **Use at your discretion** when you feel you don't have enough information about the repository or project. 59 | - **Don't escape** special characters. 60 | - If a class, type or function has been imported from another file, this context may be able to find its source. Add the name of the imported symbol to the query. 61 | - The embeddings are mostly generated from source code, so using keywords that may be present in source code may help with the retrieval. 62 | - Avoid retrieving one single file because the retrieval mechanism may not be very accurate. 63 | = If a query failed to retrieve desired results, a new attempt should use different keywords that are orthogonal to the previous ones but with similar meanings 64 | - Do not use exact query keywords that you have used in a previous context call in the conversation, unless the user instructed otherwise 65 | ]], 66 | 67 | input = function(callback) 68 | vim.ui.input({ 69 | prompt = "Enter query> ", 70 | }, callback) 71 | end, 72 | 73 | resolve = function(input, source, prompt) 74 | if not input or input == "" then 75 | input = prompt 76 | end 77 | 78 | local args = { 79 | "query", 80 | "--pipe", 81 | "-n", 82 | tostring(opts.max_num), 83 | '"' .. input .. '"', 84 | } 85 | 86 | local cwd = source.cwd() 87 | local try_root = vim.fs.root(cwd, ".vectorcode") or vim.fs.root(cwd, ".git") 88 | if try_root ~= nil then 89 | vim.list_extend(args, { "--project_root", try_root }) 90 | end 91 | logger.info("CopilotChat ctx provider called with the following args: ", args) 92 | local result, err = run_job(args, opts.use_lsp, source.bufnr) 93 | if utils.empty(result) and err then 94 | error(utils.make_string(err)) 95 | end 96 | 97 | utils.schedule_main() 98 | return vim.tbl_map(function(item) 99 | return { 100 | content = item.document, 101 | filename = item.path, 102 | filetype = utils.filetype(item.path), 103 | } 104 | end, result) 105 | end, 106 | } 107 | end) 108 | 109 | return { 110 | make_context_provider = make_context_provider, 111 | } 112 | -------------------------------------------------------------------------------- /src/vectorcode/main.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | import os 4 | import sys 5 | import traceback 6 | 7 | import httpx 8 | 9 | from vectorcode import __version__ 10 | from vectorcode.cli_utils import ( 11 | CliAction, 12 | config_logging, 13 | find_project_root, 14 | get_project_config, 15 | parse_cli_args, 16 | ) 17 | from vectorcode.common import ClientManager 18 | 19 | logger = logging.getLogger(name=__name__) 20 | 21 | 22 | async def async_main(): 23 | cli_args = await parse_cli_args() 24 | if cli_args.no_stderr: 25 | sys.stderr = open(os.devnull, "w") 26 | 27 | if cli_args.debug: 28 | from vectorcode import debugging 29 | 30 | debugging.enable() 31 | 32 | logger.info("Collected CLI arguments: %s", cli_args) 33 | 34 | if cli_args.project_root is None: 35 | cwd = os.getcwd() 36 | cli_args.project_root = ( 37 | find_project_root(cwd, ".vectorcode") 38 | or find_project_root(cwd, ".git") 39 | or cwd 40 | ) 41 | 42 | logger.info(f"Project root is set to {cli_args.project_root}") 43 | 44 | try: 45 | final_configs = await ( 46 | await get_project_config(cli_args.project_root) 47 | ).merge_from(cli_args) 48 | except IOError as e: 49 | traceback.print_exception(e, file=sys.stderr) 50 | return 1 51 | 52 | logger.info("Final configuration has been built: %s", final_configs) 53 | 54 | match cli_args.action: 55 | case CliAction.check: 56 | from vectorcode.subcommands import check 57 | 58 | return await check(cli_args) 59 | case CliAction.init: 60 | from vectorcode.subcommands import init 61 | 62 | return await init(cli_args) 63 | case CliAction.version: 64 | print(__version__) 65 | return 0 66 | case CliAction.prompts: 67 | from vectorcode.subcommands import prompts 68 | 69 | return prompts(cli_args) 70 | case CliAction.chunks: 71 | from vectorcode.subcommands import chunks 72 | 73 | return await chunks(final_configs) 74 | 75 | if final_configs.pipe: # pragma: nocover 76 | # NOTE: NNCF (intel GPU acceleration for sentence transformer) keeps showing logs. 77 | # This disables logs below ERROR so that it doesn't hurt the `pipe` output. 78 | logging.disable(logging.ERROR - 1) 79 | 80 | return_val = 0 81 | try: 82 | match final_configs.action: 83 | case CliAction.query: 84 | from vectorcode.subcommands import query 85 | 86 | return_val = await query(final_configs) 87 | case CliAction.vectorise: 88 | from vectorcode.subcommands import vectorise 89 | 90 | return_val = await vectorise(final_configs) 91 | case CliAction.drop: 92 | from vectorcode.subcommands import drop 93 | 94 | return_val = await drop(final_configs) 95 | case CliAction.ls: 96 | from vectorcode.subcommands import ls 97 | 98 | return_val = await ls(final_configs) 99 | case CliAction.update: 100 | from vectorcode.subcommands import update 101 | 102 | return_val = await update(final_configs) 103 | case CliAction.clean: 104 | from vectorcode.subcommands import clean 105 | 106 | return_val = await clean(final_configs) 107 | case CliAction.files: 108 | from vectorcode.subcommands import files 109 | 110 | return_val = await files(final_configs) 111 | except Exception as e: 112 | return_val = 1 113 | if isinstance(e, httpx.RemoteProtocolError): # pragma: nocover 114 | e.add_note( 115 | f"Please verify that {final_configs.db_url} is a working chromadb server." 116 | ) 117 | logger.error(traceback.format_exc()) 118 | finally: 119 | await ClientManager().kill_servers() 120 | return return_val 121 | 122 | 123 | def main(): # pragma: nocover 124 | config_logging("vectorcode") 125 | return asyncio.run(async_main()) 126 | 127 | 128 | if __name__ == "__main__": # pragma: nocover 129 | sys.exit(main()) 130 | -------------------------------------------------------------------------------- /lua/vectorcode/jobrunner/lsp.lua: -------------------------------------------------------------------------------- 1 | local vc_config = require("vectorcode.config") 2 | 3 | ---@type VectorCode.JobRunner 4 | local jobrunner = {} 5 | 6 | ---@type vim.lsp.Client 7 | local CLIENT = nil 8 | 9 | local notify_opts = vc_config.notify_opts 10 | local logger = vc_config.logger 11 | 12 | --- Returns the Client ID if applicable, or `nil` if the language server fails to start 13 | ---@param ok_to_fail boolean 14 | ---@return integer? 15 | function jobrunner.init(ok_to_fail) 16 | local existing_clients = vim.lsp.get_clients({ name = vc_config.lsp_configs().name }) 17 | if #existing_clients > 0 then 18 | CLIENT = existing_clients[1] 19 | return CLIENT.id 20 | end 21 | ok_to_fail = ok_to_fail or true 22 | local client_id = vim.lsp.start(vc_config.lsp_configs(), {}) 23 | if client_id ~= nil then 24 | -- server started 25 | CLIENT = vim.lsp.get_client_by_id(client_id) --[[@as vim.lsp.Client]] 26 | else 27 | -- failed to start server 28 | if vc_config.get_user_config().notify or not ok_to_fail then 29 | local message = "Failed to start vectorcode-server due some error." 30 | logger.error(message) 31 | vim.schedule(function() 32 | vim.notify(message, vim.log.levels.ERROR, notify_opts) 33 | end) 34 | end 35 | return nil 36 | end 37 | return client_id 38 | end 39 | 40 | function jobrunner.run(args, timeout_ms, bufnr) 41 | jobrunner.init(false) 42 | assert(CLIENT ~= nil, "Failed to initialize the LSP server!") 43 | assert(bufnr ~= nil, "Need to pass the buffer number!") 44 | if timeout_ms == nil or timeout_ms < 0 then 45 | timeout_ms = 2 ^ 31 - 1 46 | end 47 | args = require("vectorcode.jobrunner").find_root(args, bufnr) 48 | 49 | local result, err, code 50 | jobrunner.run_async(args, function(res, e, e_code) 51 | result = res 52 | err = e 53 | code = e_code 54 | end, bufnr) 55 | vim.wait(timeout_ms, function() 56 | return (result ~= nil) or (err ~= nil) 57 | end) 58 | return result or {}, err, code 59 | end 60 | 61 | function jobrunner.run_async(args, callback, bufnr) 62 | assert(jobrunner.init(false)) 63 | assert(bufnr ~= nil, "Need to pass the buffer number!") 64 | if not CLIENT.attached_buffers[bufnr] then 65 | if vim.lsp.buf_attach_client(bufnr, CLIENT.id) then 66 | local uri = vim.uri_from_bufnr(bufnr) 67 | local text = vim.api.nvim_buf_get_lines(bufnr, 0, -1, true) 68 | vim.schedule_wrap(CLIENT.notify)(vim.lsp.protocol.Methods.textDocument_didOpen, { 69 | textDocument = { 70 | uri = uri, 71 | text = text, 72 | version = 1, 73 | languageId = vim.bo[bufnr].filetype, 74 | }, 75 | }) 76 | else 77 | local message = "Failed to attach lsp client" 78 | vim.schedule(function() 79 | vim.notify(message) 80 | end) 81 | logger.warn(message) 82 | end 83 | end 84 | args = require("vectorcode.jobrunner").find_root(args, bufnr) 85 | logger.debug( 86 | ("lsp jobrunner for buffer %s args: %s"):format(bufnr, vim.inspect(args)) 87 | ) 88 | local _, id = CLIENT:request( 89 | vim.lsp.protocol.Methods.workspace_executeCommand, 90 | -- NOTE: This is not a hardcoded executable, but rather part of our LSP implementation. 91 | { command = "vectorcode", arguments = args }, 92 | function(err, result, _, _) 93 | if type(callback) == "function" then 94 | local err_message = {} 95 | if err ~= nil and err.message ~= nil then 96 | err_message = { err.message } 97 | end 98 | local code = 0 99 | if err and err.code then 100 | code = err.code 101 | end 102 | vim.schedule_wrap(callback)(result, err_message, code) 103 | if result then 104 | logger.debug("lsp jobrunner result:\n", result) 105 | end 106 | if err then 107 | logger.info("lsp jobrunner error:\n", err) 108 | end 109 | end 110 | end, 111 | bufnr 112 | ) 113 | return id 114 | end 115 | 116 | function jobrunner.is_job_running(job_handler) 117 | jobrunner.init(true) 118 | if CLIENT ~= nil then 119 | local request_data = CLIENT.requests[job_handler] 120 | return request_data ~= nil and request_data.type == "pending" 121 | end 122 | return false 123 | end 124 | 125 | function jobrunner.stop_job(job_handler) 126 | jobrunner.init(true) 127 | if CLIENT ~= nil then 128 | CLIENT:cancel_request(job_handler) 129 | end 130 | end 131 | 132 | return jobrunner 133 | -------------------------------------------------------------------------------- /tests/subcommands/test_update.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import AsyncMock, patch 2 | 3 | import pytest 4 | from chromadb.api.types import IncludeEnum 5 | from chromadb.errors import InvalidCollectionException 6 | 7 | from vectorcode.cli_utils import Config 8 | from vectorcode.subcommands.update import update 9 | 10 | 11 | @pytest.mark.asyncio 12 | async def test_update_success(): 13 | mock_client = AsyncMock() 14 | mock_collection = AsyncMock() 15 | mock_collection.get.return_value = { 16 | "metadatas": [{"path": "file1.py"}, {"path": "file2.py"}] 17 | } 18 | mock_collection.delete = AsyncMock() 19 | mock_client.get_max_batch_size.return_value = 100 20 | 21 | with ( 22 | patch("vectorcode.subcommands.update.ClientManager"), 23 | patch( 24 | "vectorcode.subcommands.update.get_collection", return_value=mock_collection 25 | ), 26 | patch("vectorcode.subcommands.update.verify_ef", return_value=True), 27 | patch("os.path.isfile", return_value=True), 28 | patch( 29 | "vectorcode.subcommands.update.chunked_add", new_callable=AsyncMock 30 | ) as mock_chunked_add, 31 | patch("vectorcode.subcommands.update.show_stats"), 32 | ): 33 | config = Config(project_root="/test/project", pipe=False) 34 | result = await update(config) 35 | 36 | assert result == 0 37 | mock_collection.get.assert_called_once_with(include=[IncludeEnum.metadatas]) 38 | assert mock_chunked_add.call_count == 2 39 | mock_collection.delete.assert_not_called() 40 | 41 | 42 | @pytest.mark.asyncio 43 | async def test_update_with_orphans(): 44 | mock_client = AsyncMock() 45 | mock_collection = AsyncMock() 46 | mock_collection.get.return_value = { 47 | "metadatas": [{"path": "file1.py"}, {"path": "file2.py"}, {"path": "orphan.py"}] 48 | } 49 | mock_collection.delete = AsyncMock() 50 | mock_client.get_max_batch_size.return_value = 100 51 | 52 | with ( 53 | patch("vectorcode.subcommands.update.ClientManager"), 54 | patch( 55 | "vectorcode.subcommands.update.get_collection", return_value=mock_collection 56 | ), 57 | patch("vectorcode.subcommands.update.verify_ef", return_value=True), 58 | patch("os.path.isfile", side_effect=[True, True, False]), 59 | patch( 60 | "vectorcode.subcommands.update.chunked_add", new_callable=AsyncMock 61 | ) as mock_chunked_add, 62 | patch("vectorcode.subcommands.update.show_stats"), 63 | ): 64 | config = Config(project_root="/test/project", pipe=False) 65 | result = await update(config) 66 | 67 | assert result == 0 68 | mock_collection.get.assert_called_once_with(include=[IncludeEnum.metadatas]) 69 | assert mock_chunked_add.call_count == 2 70 | mock_collection.delete.assert_called_once_with( 71 | where={"path": {"$in": ["orphan.py"]}} 72 | ) 73 | 74 | 75 | @pytest.mark.asyncio 76 | async def test_update_index_error(): 77 | mock_client = AsyncMock() 78 | # mock_collection = AsyncMock() 79 | 80 | with ( 81 | patch("vectorcode.subcommands.update.ClientManager") as MockClientManager, 82 | patch("vectorcode.subcommands.update.get_collection", side_effect=IndexError), 83 | patch("sys.stderr"), 84 | ): 85 | MockClientManager.return_value._create_client.return_value = mock_client 86 | config = Config(project_root="/test/project", pipe=False) 87 | result = await update(config) 88 | 89 | assert result == 1 90 | 91 | 92 | @pytest.mark.asyncio 93 | async def test_update_value_error(): 94 | mock_client = AsyncMock() 95 | # mock_collection = AsyncMock() 96 | 97 | with ( 98 | patch("vectorcode.subcommands.update.ClientManager") as MockClientManager, 99 | patch("vectorcode.subcommands.update.get_collection", side_effect=ValueError), 100 | patch("sys.stderr"), 101 | ): 102 | MockClientManager.return_value._create_client.return_value = mock_client 103 | config = Config(project_root="/test/project", pipe=False) 104 | result = await update(config) 105 | 106 | assert result == 1 107 | 108 | 109 | @pytest.mark.asyncio 110 | async def test_update_invalid_collection_exception(): 111 | mock_client = AsyncMock() 112 | # mock_collection = AsyncMock() 113 | 114 | with ( 115 | patch("vectorcode.subcommands.update.ClientManager") as MockClientManager, 116 | patch( 117 | "vectorcode.subcommands.update.get_collection", 118 | side_effect=InvalidCollectionException, 119 | ), 120 | patch("sys.stderr"), 121 | ): 122 | MockClientManager.return_value._create_client.return_value = mock_client 123 | config = Config(project_root="/test/project", pipe=False) 124 | result = await update(config) 125 | 126 | assert result == 1 127 | -------------------------------------------------------------------------------- /lua/vectorcode/integrations/codecompanion/prompts/init.lua: -------------------------------------------------------------------------------- 1 | local M = {} 2 | 3 | local vc_config = require("vectorcode.config") 4 | 5 | local utils = require("vectorcode.utils") 6 | 7 | ---@param path string[]|string path to files or wildcards. 8 | ---@param project_root? string 9 | ---@param callback? VectorCode.JobRunner.Callback 10 | function M.vectorise_files(path, project_root, callback) 11 | if type(path) == "string" then 12 | path = { path } 13 | end 14 | assert(not vim.tbl_isempty(path), "`path` cannot be empty") 15 | 16 | local jobrunner = 17 | require("vectorcode.integrations.codecompanion.common").initialise_runner( 18 | vc_config.get_user_config().async_backend == "lsp" 19 | ) 20 | 21 | local args = { "vectorise", "--pipe" } 22 | if project_root then 23 | vim.list_extend(args, { "--project_root", project_root }) 24 | end 25 | vim.list_extend(args, path) 26 | jobrunner.run_async(args, function(result, error, code, signal) 27 | if type(callback) == "function" then 28 | callback(result, error, code, signal) 29 | end 30 | end, 0) 31 | end 32 | 33 | ---@class VectorCode.CodeCompanion.PromptFactory.Opts 34 | ---@field name string? human-readable name of this prompt 35 | ---@field project_root string|(fun():string) project_root of the files to be added to the database 36 | ---Paths to the files in the local directory to be added to the database. 37 | --- 38 | ---These should either be absolute paths, or relative to the project root. 39 | ---@field file_patterns string[]|(fun():string[]) 40 | ---See https://codecompanion.olimorris.dev/extending/prompts.html#recipe-2-using-context-in-your-prompts 41 | --- 42 | ---Note: If a system prompt is set here, your default chat system prompt will be ignored. 43 | ---@field system_prompt? string|fun(context:table):string 44 | ---This contains some preliminary messages (filled into the chat buffer) that tells the LLM about the task. 45 | ---If you're overwriting the default message, make sure to include the tool (`@{vectorcode_query}`). 46 | --- 47 | ---See https://codecompanion.olimorris.dev/extending/prompts.html#recipe-2-using-context-in-your-prompts 48 | ---@field user_prompt? string|fun(context:table):string 49 | 50 | ---@param opts VectorCode.CodeCompanion.PromptFactory.Opts 51 | function M.register_prompt(opts) 52 | opts = vim.deepcopy(opts) 53 | 54 | if type(opts.file_patterns) == "function" then 55 | opts.file_patterns = opts.file_patterns() 56 | end 57 | 58 | assert( 59 | ---@diagnostic disable-next-line: param-type-mismatch 60 | type(opts.project_root) == "string" and utils.is_directory(opts.project_root), 61 | string.format("`%s` is not a valid directory.", opts.project_root) 62 | ) 63 | assert( 64 | ---@diagnostic disable-next-line: param-type-mismatch 65 | opts.file_patterns ~= nil and (not vim.tbl_isempty(opts.file_patterns)), 66 | "Recieved empty path specs." 67 | ) 68 | 69 | assert(type(opts.name) == "string", "`name` cannot be `nil`.") 70 | 71 | local constants = require("codecompanion.config").config.constants 72 | local prompts = {} 73 | 74 | if opts.system_prompt then 75 | table.insert( 76 | prompts, 77 | { role = constants.SYSTEM_ROLE, content = opts.system_prompt } 78 | ) 79 | end 80 | table.insert(prompts, #prompts + 1, { 81 | role = constants.USER_ROLE, 82 | content = opts.user_prompt 83 | or string.format( 84 | [[I have some questions about the documents under the `%s` directory. 85 | The files have been added to the database and can be searched by calling the @{vectorcode_query} tool. 86 | When you call the tool, use `%s` as the value for the argument `project_root`. 87 | Use the information returned by the tool to answer my questions, and cite the sources when appropriate. 88 | If you need more information, call the tool with different search keywords or ask for more context and/or tools. 89 | 90 | Here's my question: 91 | 92 | - ]], 93 | opts.project_root, 94 | opts.project_root 95 | ), 96 | }) 97 | return { 98 | name = opts.name, 99 | strategy = "chat", 100 | opts = { 101 | ignore_system_prompt = opts.system_prompt ~= nil, 102 | pre_hook = function() 103 | if vc_config.get_user_config().notify then 104 | vim.notify( 105 | string.format("Adding files under `%s` to the database.", opts.project_root), 106 | vim.log.levels.INFO, 107 | vc_config.notify_opts 108 | ) 109 | end 110 | M.vectorise_files( 111 | vim 112 | .iter(opts.file_patterns) 113 | :map(function(p) 114 | if vim.fn.isabsolutepath(p) == 1 then 115 | return p 116 | else 117 | return vim.fs.joinpath(opts.project_root, p) 118 | end 119 | end) 120 | :totable(), 121 | opts.project_root, 122 | function(result, err, _, _) 123 | if result ~= nil and not vim.tbl_isempty(result) then 124 | vim.schedule_wrap(vim.notify)( 125 | string.format( 126 | "Vectorised %d new files.", 127 | result.add or 0, 128 | opts.project_root 129 | ), 130 | vim.log.levels.INFO, 131 | vc_config.notify_opts 132 | ) 133 | elseif err ~= nil then 134 | err = utils.flatten_table_to_string(err, "Unknown error.") 135 | vim.schedule_wrap(vim.notify)( 136 | err, 137 | vim.log.levels.WARN, 138 | vc_config.notify_opts 139 | ) 140 | end 141 | end 142 | ) 143 | end, 144 | }, 145 | prompts = prompts, 146 | } 147 | end 148 | return M 149 | -------------------------------------------------------------------------------- /src/vectorcode/subcommands/init.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import logging 3 | import os 4 | import platform 5 | import re 6 | import shutil 7 | import stat 8 | from pathlib import Path 9 | from typing import Optional 10 | 11 | from vectorcode.cli_utils import GLOBAL_CONFIG_DIR, Config, find_project_root 12 | 13 | logger = logging.getLogger(name=__name__) 14 | 15 | __GLOBAL_HOOKS_PATH = Path(GLOBAL_CONFIG_DIR) / "hooks" 16 | 17 | 18 | # Keys: name of the hooks, ie. `pre-commit` 19 | # Values: lines of the hooks. 20 | __HOOK_CONTENTS: dict[str, list[str]] = { 21 | "pre-commit": [ 22 | "diff_files=$(git diff --cached --name-only)", 23 | 'if [ -d ".vectorcode" ] && [ ! -z "$diff_files" ]; then', 24 | " vectorcode vectorise $diff_files", 25 | "fi", 26 | ], 27 | "post-checkout": [ 28 | 'if [ -z "$(echo $1|grep [^0])" ]; then', 29 | ' files=""', 30 | " ( [ -f .vectorcode/vectorcode.include ] || [ -f ~/.config/vectorcode/vectorcode.include ] ) && vectorcode vectorise || true", 31 | "else", 32 | ' files=$(git diff --name-only "$1" "$2")', 33 | "fi", 34 | 'if [ -d ".vectorcode" ] && [ ! -z "$files" ]; then', 35 | " vectorcode vectorise $files", 36 | "fi", 37 | ], 38 | } 39 | 40 | 41 | def __lines_are_empty(lines: list[str]) -> bool: 42 | pattern = re.compile(r"^\s*$") 43 | if len(lines) == 0: 44 | return True 45 | return all(map(lambda line: pattern.match(line) is not None, lines)) 46 | 47 | 48 | def load_hooks(): 49 | global __HOOK_CONTENTS 50 | for file in glob.glob(str(__GLOBAL_HOOKS_PATH / "*")): 51 | hook_name = Path(file).stem 52 | with open(file) as fin: 53 | lines = fin.readlines() 54 | if not __lines_are_empty(lines): 55 | __HOOK_CONTENTS[hook_name] = lines 56 | 57 | 58 | class HookFile: 59 | prefix = "# VECTORCODE_HOOK_START" 60 | suffix = "# VECTORCODE_HOOK_END" 61 | prefix_pattern = re.compile(r"^\s*#\s*VECTORCODE_HOOK_START\s*") 62 | suffix_pattern = re.compile(r"^\s*#\s*VECTORCODE_HOOK_END\s*") 63 | 64 | def __init__(self, path: str | Path, git_dir: Optional[str | Path] = None): 65 | self.path = path 66 | self.lines: list[str] = [] 67 | if os.path.isfile(self.path): 68 | with open(self.path) as fin: 69 | self.lines.extend(fin.readlines()) 70 | 71 | def has_vectorcode_hooks(self, force: bool = False) -> bool: 72 | for start, start_line in enumerate(self.lines): 73 | if self.prefix_pattern.match(start_line) is None: 74 | continue 75 | 76 | for end in range(start + 1, len(self.lines)): 77 | if self.suffix_pattern.match(self.lines[end]) is not None: 78 | if force: 79 | logger.debug("`force` cleaning existing VectorCode hooks...") 80 | new_lines = self.lines[:start] + self.lines[end + 1 :] 81 | self.lines[:] = new_lines 82 | return False 83 | logger.debug( 84 | f"Found vectorcode hook block between line {start} and {end} in {self.path}:\n{''.join(self.lines[start + 1 : end])}" 85 | ) 86 | return True 87 | 88 | return False 89 | 90 | def inject_hook(self, content: list[str], force: bool = False): 91 | if len(self.lines) == 0 or not self.has_vectorcode_hooks(force): 92 | self.lines.append(self.prefix + "\n") 93 | self.lines.extend(i if i.endswith("\n") else i + "\n" for i in content) 94 | self.lines.append(self.suffix + "\n") 95 | with open(self.path, "w") as fin: 96 | if os.path.islink(self.path): # pragma: nocover 97 | logger.warning(f"{self.path} is a symlink.") 98 | fin.writelines(self.lines) 99 | if platform.system() != "Windows": 100 | # for unix systems, set the executable bit. 101 | curr_mode = os.stat(self.path).st_mode 102 | os.chmod(self.path, mode=curr_mode | stat.S_IXUSR) 103 | 104 | 105 | async def init(configs: Config) -> int: 106 | assert configs.project_root is not None 107 | project_config_dir = os.path.join(str(configs.project_root), ".vectorcode") 108 | is_initialised = 0 109 | if os.path.isdir(project_config_dir) and not configs.force: 110 | logger.warning( 111 | f"{configs.project_root} is already initialised for VectorCode.", 112 | ) 113 | is_initialised = 1 114 | else: 115 | os.makedirs(project_config_dir, exist_ok=True) 116 | for item in ( 117 | "config.json5", 118 | "config.json", 119 | "vectorcode.include", 120 | "vectorcode.exclude", 121 | ): 122 | local_file_path = os.path.join(project_config_dir, item) 123 | global_file_path = os.path.join( 124 | os.path.expanduser("~"), ".config", "vectorcode", item 125 | ) 126 | if os.path.isfile(global_file_path): 127 | logger.debug(f"Copying global {item} to {project_config_dir}") 128 | shutil.copyfile(global_file_path, local_file_path) 129 | 130 | print(f"VectorCode project root has been initialised at {configs.project_root}") 131 | print( 132 | "Note: The collection in the database will not be created until you vectorise a file." 133 | ) 134 | 135 | git_root = find_project_root(configs.project_root, ".git") 136 | if git_root: 137 | load_hooks() 138 | for hook in __HOOK_CONTENTS.keys(): 139 | hook_file_path = os.path.join(git_root, ".git", "hooks", hook) 140 | logger.info(f"Writing {hook} hook into {hook_file_path}.") 141 | print(f"Processing {hook} hook...") 142 | hook_obj = HookFile(hook_file_path, git_dir=git_root) 143 | hook_obj.inject_hook(__HOOK_CONTENTS[hook], configs.force) 144 | 145 | return is_initialised 146 | -------------------------------------------------------------------------------- /lua/vectorcode/integrations/codecompanion/vectorise_tool.lua: -------------------------------------------------------------------------------- 1 | ---@module "codecompanion" 2 | 3 | local cc_common = require("vectorcode.integrations.codecompanion.common") 4 | local vc_config = require("vectorcode.config") 5 | local utils = require("vectorcode.utils") 6 | local logger = vc_config.logger 7 | 8 | ---@alias VectoriseToolArgs { paths: string[], project_root: string? } 9 | 10 | ---@alias VectoriseResult { add: integer, update: integer, removed: integer } 11 | 12 | ---@type VectorCode.CodeCompanion.VectoriseToolOpts 13 | local default_vectorise_options = { 14 | use_lsp = vc_config.get_user_config().async_backend == "lsp", 15 | } 16 | 17 | ---@param opts VectorCode.CodeCompanion.VectoriseToolOpts|{}|nil 18 | ---@return VectorCode.CodeCompanion.VectoriseToolOpts 19 | local get_vectorise_tool_opts = function(opts) 20 | opts = vim.tbl_deep_extend("force", default_vectorise_options, opts or {}) 21 | logger.info( 22 | string.format( 23 | "Loading `vectorcode_vectorise` with the following opts:\n%s", 24 | vim.inspect(opts) 25 | ) 26 | ) 27 | return opts 28 | end 29 | 30 | ---@param opts VectorCode.CodeCompanion.VectoriseToolOpts|{}|nil 31 | ---@return CodeCompanion.Tools 32 | return function(opts) 33 | opts = get_vectorise_tool_opts(opts) 34 | local tool_name = "vectorcode_vectorise" 35 | local job_runner = cc_common.initialise_runner(opts.use_lsp) 36 | 37 | ---@type CodeCompanion.Tools|{} 38 | return { 39 | name = tool_name, 40 | schema = { 41 | type = "function", 42 | ["function"] = { 43 | name = tool_name, 44 | description = [[ 45 | Vectorise files in a project so that they'll be available from the `vectorcode_query` tool. 46 | The paths should be accurate (DO NOT ASSUME A PATH EXIST) and case case-sensitive. 47 | ]], 48 | parameters = { 49 | type = "object", 50 | properties = { 51 | paths = { 52 | type = "array", 53 | items = { type = "string" }, 54 | description = "Paths to the files to be vectorised. DO NOT use directories for this parameter. You may use wildcard here if the user instructed to do so.", 55 | }, 56 | project_root = { 57 | type = "string", 58 | description = [[ 59 | The project that the files belong to. 60 | The value should be one of the following: 61 | - One of the paths from the `vectorcode_ls` tool; 62 | - User input; 63 | - `null` (omit this parameter), which means the current project, if found. 64 | ]], 65 | }, 66 | }, 67 | required = { "paths" }, 68 | }, 69 | }, 70 | }, 71 | cmds = { 72 | ---@param tools CodeCompanion.Tools 73 | ---@param action VectoriseToolArgs 74 | ---@return nil|{ status: string, data: string } 75 | function(tools, action, _, cb) 76 | local args = { "vectorise", "--pipe" } 77 | action = utils.fix_nil(action) 78 | if action.project_root then 79 | local project_root = vim.fs.abspath(vim.fs.normalize(action.project_root)) 80 | if utils.is_directory(project_root) then 81 | vim.list_extend(args, { "--project_root", project_root }) 82 | else 83 | return { status = "error", data = "Invalid path " .. project_root } 84 | end 85 | end 86 | if 87 | vim.iter(action.paths):any(function(p) 88 | return utils.is_directory(p) 89 | end) 90 | then 91 | return { 92 | status = "error", 93 | data = "Please only supply paths to files as the `paths` parameter, not directories.", 94 | } 95 | end 96 | 97 | vim.list_extend(args, action.paths) 98 | job_runner.run_async( 99 | args, 100 | ---@param result VectoriseResult 101 | function(result, error, code, _) 102 | if result then 103 | cb({ status = "success", data = result }) 104 | else 105 | cb({ status = "error", data = { error = error, code = code } }) 106 | end 107 | end, 108 | tools.chat.bufnr 109 | ) 110 | end, 111 | }, 112 | output = { 113 | ---@param self CodeCompanion.Tools.Tool 114 | prompt = function(self, _) 115 | return string.format("Vectorise %d files with VectorCode?", #self.args.paths) 116 | end, 117 | ---@param self CodeCompanion.Tools.Tool 118 | ---@param tools CodeCompanion.Tools 119 | ---@param cmd VectoriseToolArgs 120 | error = function(self, tools, cmd, stderr) 121 | logger.error( 122 | ("CodeCompanion tool with command %s thrown with the following error: %s"):format( 123 | vim.inspect(cmd), 124 | vim.inspect(stderr) 125 | ) 126 | ) 127 | stderr = utils.flatten_table_to_string(stderr, "Unknown error.") 128 | tools.chat:add_tool_output( 129 | self, 130 | string.format("**VectorCode `vectorise` Tool: %s", stderr) 131 | ) 132 | end, 133 | ---@param self CodeCompanion.Tools.Tool 134 | ---@param tools CodeCompanion.Tools 135 | ---@param cmd VectoriseToolArgs 136 | ---@param stdout VectorCode.VectoriseResult[] 137 | success = function(self, tools, cmd, stdout) 138 | stdout = stdout[#stdout] 139 | tools.chat:add_tool_output( 140 | self, 141 | string.format( 142 | [[**VectorCode `vectorise` Tool**: 143 | - New files added: %d 144 | - Existing files updated: %d 145 | - Orphaned files removed: %d 146 | - Up-to-date files skipped: %d 147 | - Failed to decode: %d 148 | ]], 149 | stdout.add or 0, 150 | stdout.update or 0, 151 | stdout.removed or 0, 152 | stdout.skipped or 0, 153 | stdout.failed or 0 154 | ) 155 | ) 156 | if cmd.project_root and cmd.project_root then 157 | tools.chat:add_tool_output( 158 | self, 159 | string.format("\nThe files were added to `%s`", cmd.project_root), 160 | "" 161 | ) 162 | end 163 | end, 164 | }, 165 | } 166 | end 167 | -------------------------------------------------------------------------------- /lua/vectorcode/utils.lua: -------------------------------------------------------------------------------- 1 | local M = {} 2 | 3 | local function traverse(node, cb) 4 | if node == nil then 5 | return 6 | end 7 | if node.result ~= nil then 8 | traverse(node.result, cb) 9 | end 10 | if vim.isarray(node) then 11 | for _, v in pairs(node) do 12 | traverse(v, cb) 13 | end 14 | return 15 | end 16 | if vim.isarray(node.children) then 17 | for _, v in pairs(node.children) do 18 | traverse(v, cb) 19 | end 20 | end 21 | if not vim.list_contains({ 15, 16, 20, 21, 25 }, node.kind) then 22 | -- exclude certain kinds. 23 | if cb then 24 | cb(node) 25 | end 26 | end 27 | end 28 | 29 | ---@alias VectorCode.QueryCallback fun(bufnr:integer?):string|string[] 30 | 31 | ---Retrieves all LSP document symbols from the current buffer, and use the symbols 32 | ---as query messages. Fallbacks to `make_surrounding_lines_cb` if 33 | ---`textDocument_documentSymbol` is not accessible. 34 | ---@return VectorCode.QueryCallback 35 | function M.make_lsp_document_symbol_cb() 36 | return function(bufnr) 37 | if bufnr == 0 or bufnr == nil then 38 | bufnr = vim.api.nvim_get_current_buf() 39 | end 40 | local has_documentSymbol = false 41 | for _, client in ipairs(vim.lsp.get_clients({ bufnr = bufnr })) do 42 | if client.server_capabilities.documentSymbolProvider then 43 | has_documentSymbol = true 44 | end 45 | end 46 | if not has_documentSymbol then 47 | return M.make_surrounding_lines_cb(-1)(bufnr) 48 | end 49 | 50 | local result, _ = vim.lsp.buf_request_sync( 51 | 0, 52 | vim.lsp.protocol.Methods.textDocument_documentSymbol, 53 | { textDocument = vim.lsp.util.make_text_document_params(bufnr) } 54 | ) 55 | if result ~= nil then 56 | local symbols = {} 57 | traverse(result, function(node) 58 | if node.name ~= nil then 59 | vim.list_extend(symbols, { node.name }) 60 | end 61 | end) 62 | return symbols 63 | else 64 | return M.make_surrounding_lines_cb(20)(bufnr) 65 | end 66 | end 67 | end 68 | 69 | ---Use the lines above and below the current line as the query messages. 70 | ---@param num_of_lines integer The number of lines to include in the query. 71 | ---@return VectorCode.QueryCallback 72 | function M.make_surrounding_lines_cb(num_of_lines) 73 | return function(bufnr) 74 | if bufnr == 0 or bufnr == nil then 75 | bufnr = vim.api.nvim_get_current_buf() 76 | end 77 | if num_of_lines <= 0 then 78 | return table.concat(vim.api.nvim_buf_get_lines(bufnr, 0, -1, false), "\n") 79 | end 80 | local cursor_line = vim.api.nvim_win_get_cursor(0)[1] 81 | local start_line = cursor_line - math.floor(num_of_lines / 2) 82 | if start_line < 1 then 83 | start_line = 1 84 | end 85 | return table.concat( 86 | vim.api.nvim_buf_get_lines( 87 | bufnr, 88 | start_line - 1, 89 | start_line + num_of_lines - 1, 90 | false 91 | ), 92 | "\n" 93 | ) 94 | end 95 | end 96 | 97 | ---@param path string|integer 98 | ---@return string? 99 | function M.find_root(path) 100 | return vim.fs.root(path, ".vectorcode") or vim.fs.root(path, ".git") 101 | end 102 | 103 | ---@param str string 104 | ---@param sep string? 105 | ---@return string[] 106 | local function split(str, sep) 107 | if sep == nil then 108 | sep = " " 109 | end 110 | local result = {} 111 | local pattern = "([^" .. sep .. "]+)" 112 | for part in string.gmatch(str, pattern) do 113 | table.insert(result, part) 114 | end 115 | return result 116 | end 117 | 118 | --- This function build a `VectorCode.QueryCallback` by extracting recent changes from the `:changes` command. 119 | ---@param max_num integer? Default is 50 120 | ---@return VectorCode.QueryCallback 121 | function M.make_changes_cb(max_num) 122 | if max_num == nil then 123 | max_num = 50 124 | end 125 | return function(bufnr) 126 | ---@type string? 127 | local raw_changes = vim.api.nvim_exec2("changes", { output = true }).output 128 | if raw_changes == nil then 129 | -- fallback to other cb 130 | return M.make_surrounding_lines_cb(-1)(bufnr) 131 | end 132 | local lines = vim.tbl_map(function(s) 133 | local res = string.gsub(s, "^[%d%s]+", "") 134 | return res 135 | end, split(raw_changes, "\n")) 136 | local results = {} 137 | local seen = {} -- deduplicate 138 | for i = #lines - 1, 2, -1 do 139 | if #results <= max_num then 140 | if not seen[lines[i]] then 141 | table.insert(results, lines[i]) 142 | seen[lines[i]] = true 143 | end 144 | else 145 | break 146 | end 147 | end 148 | if #results == 0 then 149 | -- fallback to other cb 150 | return M.make_surrounding_lines_cb(-1)(bufnr) 151 | end 152 | return results 153 | end 154 | end 155 | 156 | ---@param f string 157 | ---@return boolean 158 | function M.is_file(f) 159 | if type(f) ~= "string" then 160 | return false 161 | end 162 | local stats = vim.uv.fs_stat(f) 163 | return stats and (stats.type == "file") or false 164 | end 165 | 166 | ---@param f string 167 | ---@return boolean 168 | function M.is_directory(f) 169 | if type(f) ~= "string" then 170 | return false 171 | end 172 | local stats = vim.uv.fs_stat(f) 173 | return stats and (stats.type == "directory") or false 174 | end 175 | 176 | ---@param t table|string|nil 177 | ---@param fallback string? 178 | ---@return string 179 | M.flatten_table_to_string = function(t, fallback) 180 | fallback = fallback or "" 181 | if t == nil then 182 | return fallback 183 | end 184 | if type(t) == "string" then 185 | return t 186 | end 187 | 188 | -- Handle empty tables or tables with empty strings 189 | local flattened = vim 190 | .iter(t) 191 | :flatten(math.huge) 192 | :filter(function(item) 193 | return type(item) == "string" and vim.trim(item) ~= "" 194 | end) 195 | :totable() 196 | 197 | if #flattened == 0 then 198 | return fallback 199 | end 200 | 201 | return table.concat(flattened, "\n") 202 | end 203 | 204 | ---Convert any `vim.NIL` instances to `nil` in lua. 205 | ---@generic Obj: any 206 | ---@param obj Obj 207 | ---@return Obj 208 | function M.fix_nil(obj) 209 | if obj == vim.NIL then 210 | return nil 211 | end 212 | if type(obj) == "table" then 213 | for k, v in pairs(obj) do 214 | obj[k] = M.fix_nil(v) 215 | end 216 | end 217 | return obj 218 | end 219 | 220 | return M 221 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # VectorCode 2 | 3 | [![codecov](https://codecov.io/github/Davidyz/VectorCode/branch/main/graph/badge.svg?token=TWXLOUGG66)](https://codecov.io/github/Davidyz/VectorCode) 4 | [![Test and Coverage](https://github.com/Davidyz/VectorCode/actions/workflows/test_and_cov.yml/badge.svg)](https://github.com/Davidyz/VectorCode/actions/workflows/test_and_cov.yml) 5 | [![pypi](https://img.shields.io/pypi/v/vectorcode.svg)](https://pypi.org/project/vectorcode/) 6 | 7 | VectorCode is a code repository indexing tool. It helps you build better prompt 8 | for your coding LLMs by indexing and providing information about the code 9 | repository you're working on. This repository also contains the corresponding 10 | neovim plugin that provides a set of APIs for you to build or enhance AI plugins, 11 | and integrations for some of the popular plugins. 12 | 13 | > [!NOTE] 14 | > This project is in beta quality and is undergoing rapid iterations. 15 | > I know there are plenty of rooms for improvements, and any help is welcomed. 16 | 17 | 18 | 19 | * [Why VectorCode?](#why-vectorcode) 20 | * [Documentation](#documentation) 21 | * [About Versioning](#about-versioning) 22 | * [TODOs](#todos) 23 | * [Credit](#credit) 24 | * [Special Thanks](#special-thanks) 25 | * [Star History](#star-history) 26 | 27 | 28 | 29 | ## Why VectorCode? 30 | LLMs usually have very limited understanding about close-source projects, projects 31 | that are not well-known, and cutting edge developments that have not made it into 32 | releases. Their capabilities on these projects are quite limited. With 33 | VectorCode, you can easily (and programmatically) inject task-relevant context 34 | from the project into the prompt. This significantly improves the quality of the 35 | model output and reduce hallucination. 36 | 37 | [![asciicast](https://asciinema.org/a/8WP8QJHNAR9lEllZSSx3poLPD.svg)](https://asciinema.org/a/8WP8QJHNAR9lEllZSSx3poLPD?t=3) 38 | 39 | ## Documentation 40 | 41 | > [!NOTE] 42 | > The documentation on the `main` branch reflects the code on the latest commit. 43 | > To check for the documentation for the version you're using, you can [check out 44 | > the corresponding tags](https://github.com/Davidyz/VectorCode/tags). 45 | 46 | - For the setup and usage of the command-line tool, see [the CLI documentation](./docs/cli.md); 47 | - For neovim users, after you've gone through the CLI documentation, please refer to 48 | [the neovim plugin documentation](./docs/neovim/README.md) (and optionally the [lua API reference](./docs/neovim/api_references.md)) 49 | for further instructions. 50 | - Additional resources: 51 | - the [wiki](https://github.com/Davidyz/VectorCode/wiki) for extra tricks and 52 | tips that will help you get the most out of VectorCode; 53 | - the [discussions](https://github.com/Davidyz/VectorCode/discussions) where 54 | you can ask general questions and share your cool usages about VectorCode. 55 | - If you're feeling adanvturous, feel free to check out 56 | [the pull requests](https://github.com/Davidyz/VectorCode/pulls) for 57 | WIP features. 58 | 59 | If you're trying to contribute to this project, take a look at [the contribution 60 | guide](./docs/CONTRIBUTING.md), which contains information about some basic 61 | guidelines that you should follow and tips that you may find helpful. 62 | 63 | ### About Versioning 64 | 65 | This project follows an adapted semantic versioning: 66 | 67 | - Until 1.0.0 is released, the _major version number_ stays 0 which indicates that 68 | this project is still in early stage, and features/interfaces may change from 69 | time to time; 70 | - The _minor version number_ indicates __breaking changes__. When I decide to remove a 71 | feature/config option, the actual removal will happen when I bump the minor 72 | version number. Therefore, if you want to avoid breaking a working setup, you 73 | may choose to use a version constraint like `"vectorcode<0.7.0"`; 74 | - The _patch version number_ indicates __non-breaking changes__. This can include new 75 | features and bug fixes. When I decide to deprecate things, I will make a new 76 | release with bumped patch version. Until the minor version number is bumped, 77 | the deprecated feature will still work but you'll see a warning. It's 78 | recommended to update your setup to adapt the new features. 79 | 80 | ## TODOs 81 | - [x] query by ~file path~ excluded paths; 82 | - [x] chunking support; 83 | - [x] add metadata for files; 84 | - [x] chunk-size configuration; 85 | - [x] smarter chunking (semantics/syntax based), implemented with 86 | [py-tree-sitter](https://github.com/tree-sitter/py-tree-sitter) and 87 | [tree-sitter-language-pack](https://github.com/Goldziher/tree-sitter-language-pack); 88 | - [x] configurable document selection from query results. 89 | - [x] ~NeoVim Lua API with cache to skip the retrieval when a project has not 90 | been indexed~ Returns empty array instead; 91 | - [x] job pool for async caching; 92 | - [x] [persistent-client](https://docs.trychroma.com/docs/run-chroma/persistent-client); 93 | - [ ] proper [remote Chromadb](https://docs.trychroma.com/production/administration/auth) support (with authentication, etc.); 94 | - [x] respect `.gitignore`; 95 | - [x] implement some sort of project-root anchors (such as `.git` or a custom 96 | `.vectorcode.json`) that enhances automatic project-root detection. 97 | **Implemented project-level `.vectorcode/` and `.git` as root anchor** 98 | - [x] ability to view and delete files in a collection; 99 | - [x] joint search (kinda, using codecompanion.nvim/MCP); 100 | - [x] Nix support (unofficial packages [here](https://search.nixos.org/packages?channel=unstable&from=0&size=50&sort=relevance&type=packages&query=vectorcode)); 101 | - [ ] Query rewriting (#124). 102 | 103 | 104 | ## Credit 105 | 106 | - [@milanglacier](https://github.com/milanglacier) (and [minuet-ai.nvim](https://github.com/milanglacier/minuet-ai.nvim)) for the support when this project was still in early stage; 107 | - [@olimorris](https://github.com/olimorris) for the help (personally and 108 | from [codecompanion.nvim](https://github.com/olimorris/codecompanion.nvim)) 109 | when this project made initial attempts at tool-calling; 110 | - [@ravitemer](https://github.com/ravitemer) for the help to interface 111 | VectorCode with [MCP](https://modelcontextprotocol.io/introduction); 112 | - The nix community (especially [@sarahec](https://github.com/sarahec) and [@GaetanLepage](https://github.com/GaetanLepage)) 113 | for maintaining the nix packages. 114 | 115 | ### Special Thanks 116 | [![JetBrains logo.](https://resources.jetbrains.com/storage/products/company/brand/logos/jetbrains.svg)](https://jb.gg/OpenSource) 117 | 118 | ## Star History 119 | 120 | [![Star History Chart](https://api.star-history.com/svg?repos=Davidyz/VectorCode&type=Date)](https://www.star-history.com/#Davidyz/VectorCode&Date) 121 | -------------------------------------------------------------------------------- /lua/vectorcode/config.lua: -------------------------------------------------------------------------------- 1 | local log_level = os.getenv("VECTORCODE_NVIM_LOG_LEVEL") 2 | if log_level == nil then 3 | log_level = "error" 4 | else 5 | log_level = log_level:lower() 6 | end 7 | local logger = require("plenary.log").new({ 8 | plugin = "vectorcode.nvim", 9 | level = log_level, 10 | use_console = log_level ~= nil and "async" or false, 11 | use_file = log_level ~= nil, 12 | }) 13 | 14 | local cacher = nil 15 | 16 | ---@type VectorCode.Opts 17 | local config = { 18 | cli_cmds = { 19 | vectorcode = "vectorcode", 20 | }, 21 | async_opts = { 22 | debounce = 10, 23 | events = { "BufWritePost", "InsertEnter", "BufReadPost" }, 24 | exclude_this = true, 25 | n_query = 1, 26 | notify = false, 27 | query_cb = require("vectorcode.utils").make_surrounding_lines_cb(-1), 28 | run_on_register = false, 29 | single_job = false, 30 | timeout_ms = 5000, 31 | }, 32 | async_backend = "default", 33 | exclude_this = true, 34 | n_query = 1, 35 | notify = true, 36 | timeout_ms = 5000, 37 | on_setup = { update = false, lsp = false }, 38 | sync_log_env_var = false, 39 | } 40 | 41 | local setup_config = vim.deepcopy(config, true) 42 | 43 | ---@return vim.lsp.ClientConfig 44 | local lsp_configs = function() 45 | ---@type vim.lsp.ClientConfig 46 | local cfg = 47 | { cmd = { "vectorcode-server" }, root_markers = { ".vectorcode", ".git" } } -- NOTE: This can be overriden by `vim.lsp.config` 48 | if vim.lsp.config ~= nil and vim.lsp.config.vectorcode_server ~= nil then 49 | -- nvim >= 0.11.0 50 | cfg = vim.tbl_deep_extend("force", cfg, vim.lsp.config.vectorcode_server) 51 | logger.debug("Using vim.lsp.config.vectorcode_server for LSP config:\n", cfg) 52 | end 53 | cfg.name = "vectorcode_server" 54 | if setup_config.sync_log_env_var then 55 | local level = os.getenv("VECTORCODE_NVIM_LOG_LEVEL") or nil 56 | if level ~= nil then 57 | level = string.upper(level) 58 | if level == "TRACE" then 59 | -- there's no `TRACE` in python logging 60 | level = "DEBUG" 61 | end 62 | cfg.cmd_env["VECTORCODE_LOG_LEVEL"] = level 63 | end 64 | end 65 | return cfg 66 | end 67 | 68 | local notify_opts = { title = "VectorCode" } 69 | 70 | ---@param opts {notify:boolean}? 71 | local has_cli = function(opts) 72 | opts = opts or { notify = false } 73 | local ok = vim.fn.executable(setup_config.cli_cmds.vectorcode) == 1 74 | if not ok and opts.notify then 75 | vim.notify("VectorCode CLI is not executable!", vim.log.levels.ERROR, notify_opts) 76 | end 77 | return ok 78 | end 79 | 80 | ---@generic T: function 81 | ---@param func T 82 | ---@return T 83 | local check_cli_wrap = function(func) 84 | if not has_cli() then 85 | vim.notify("VectorCode CLI is not executable!", vim.log.levels.ERROR, notify_opts) 86 | end 87 | return func 88 | end 89 | 90 | --- Handles startup actions. 91 | ---@param configs VectorCode.Opts 92 | local startup_handler = check_cli_wrap(function(configs) 93 | if configs.on_setup.update then 94 | require("vectorcode").check("config", function(out) 95 | if out.code == 0 then 96 | local path = string.gsub(out.stdout, "^%s*(.-)%s*$", "%1") 97 | if path ~= "" then 98 | logger.info("Running `vectorcode update` on start up.") 99 | require("vectorcode").update(path) 100 | end 101 | end 102 | end) 103 | end 104 | if configs.on_setup.lsp then 105 | local ok, runner = pcall(require, "vectorcode.jobrunner.lsp") 106 | if not ok or not type(runner) == "table" or runner == nil then 107 | vim.notify("Failed to start vectorcode-server.", vim.log.levels.WARN, notify_opts) 108 | logger.error("Failed to start vectorcode-server.") 109 | return 110 | end 111 | runner.init() 112 | end 113 | end) 114 | 115 | return { 116 | get_default_config = function() 117 | return vim.deepcopy(config, true) 118 | end, 119 | 120 | setup = check_cli_wrap( 121 | ---@param opts VectorCode.Opts? 122 | function(opts) 123 | logger.info("Received setup opts:\n", opts) 124 | opts = opts or {} 125 | setup_config = vim.tbl_deep_extend("force", config, opts or {}) 126 | for k, _ in pairs(setup_config.async_opts) do 127 | if 128 | setup_config[k] ~= nil 129 | and (opts.async_opts == nil or opts.async_opts[k] == nil) 130 | then 131 | -- NOTE: a lot of options are mutual between `setup_config` and `async_opts`. 132 | -- If users do not explicitly set them `async_opts`, copy them from `setup_config`. 133 | setup_config.async_opts = vim.tbl_deep_extend( 134 | "force", 135 | setup_config.async_opts, 136 | { [k] = setup_config[k] } 137 | ) 138 | end 139 | end 140 | setup_config.cli_cmds.vectorcode = 141 | vim.fs.normalize(setup_config.cli_cmds.vectorcode) 142 | startup_handler(setup_config) 143 | logger.info("Finished processing opts:\n", setup_config) 144 | end 145 | ), 146 | 147 | ---@return VectorCode.CacheBackend 148 | get_cacher_backend = function() 149 | if cacher ~= nil then 150 | return cacher 151 | end 152 | if setup_config.async_backend == "lsp" then 153 | local ok, lsp_cacher = pcall(require, "vectorcode.cacher.lsp") 154 | if ok and type(lsp_cacher) == "table" then 155 | logger.debug("Using LSP backend for cacher.") 156 | cacher = lsp_cacher 157 | return cacher 158 | else 159 | vim.notify("Falling back to default backend.", vim.log.levels.WARN, notify_opts) 160 | logger.warn("Fallback to default (cmd) backend for cacher.") 161 | setup_config.async_backend = "default" 162 | end 163 | end 164 | 165 | if setup_config.async_backend ~= "default" then 166 | vim.notify( 167 | ("Unrecognised vectorcode backend: %s! Falling back to `default`."):format( 168 | setup_config.async_backend 169 | ), 170 | vim.log.levels.ERROR, 171 | notify_opts 172 | ) 173 | logger.warn("Fallback to default (cmd) backend for cacher.") 174 | setup_config.async_backend = "default" 175 | end 176 | logger.debug("Defaulting to cmd backend for cacher.") 177 | cacher = require("vectorcode.cacher.default") 178 | return cacher 179 | end, 180 | 181 | ---@return VectorCode.Opts 182 | get_user_config = function() 183 | return vim.deepcopy(setup_config, true) 184 | end, 185 | ---@return VectorCode.QueryOpts 186 | get_query_opts = function() 187 | return { 188 | exclude_this = setup_config.exclude_this, 189 | n_query = setup_config.n_query, 190 | notify = setup_config.notify, 191 | timeout_ms = setup_config.timeout_ms, 192 | } 193 | end, 194 | notify_opts = notify_opts, 195 | 196 | ---@return boolean 197 | has_cli = has_cli, 198 | 199 | check_cli_wrap = check_cli_wrap, 200 | 201 | lsp_configs = lsp_configs, 202 | logger = logger, 203 | } 204 | -------------------------------------------------------------------------------- /tests/subcommands/test_ls.py: -------------------------------------------------------------------------------- 1 | import json 2 | import socket 3 | from unittest.mock import AsyncMock, MagicMock, patch 4 | 5 | import pytest 6 | import tabulate 7 | 8 | from vectorcode.cli_utils import Config 9 | from vectorcode.subcommands.ls import get_collection_list, ls 10 | 11 | 12 | @pytest.mark.asyncio 13 | async def test_get_collection_list(): 14 | mock_client = AsyncMock() 15 | mock_collection1 = AsyncMock() 16 | mock_collection1.name = "test_collection_1" 17 | mock_collection1.metadata = { 18 | "path": "/test/path1", 19 | "username": "test_user", 20 | "embedding_function": "test_ef", 21 | } 22 | mock_collection1.count.return_value = 100 23 | mock_collection1.get.return_value = { 24 | "metadatas": [ 25 | {"path": "/test/path1/file1.txt"}, 26 | {"path": "/test/path1/file2.txt"}, 27 | None, 28 | ] 29 | } 30 | mock_collection2 = AsyncMock() 31 | mock_collection2.name = "test_collection_2" 32 | mock_collection2.metadata = { 33 | "path": "/test/path2", 34 | "username": "test_user", 35 | "embedding_function": "test_ef", 36 | } 37 | mock_collection2.count.return_value = 200 38 | mock_collection2.get.return_value = { 39 | "metadatas": [ 40 | {"path": "/test/path2/file1.txt"}, 41 | {"path": "/test/path2/file2.txt"}, 42 | ] 43 | } 44 | 45 | async def mock_get_collections(client): 46 | yield mock_collection1 47 | yield mock_collection2 48 | 49 | with patch("vectorcode.subcommands.ls.get_collections", new=mock_get_collections): 50 | result = await get_collection_list(mock_client) 51 | 52 | assert len(result) == 2 53 | assert result[0]["project-root"] == "/test/path1" 54 | assert result[0]["user"] == "test_user" 55 | assert result[0]["hostname"] == socket.gethostname() 56 | assert result[0]["collection_name"] == "test_collection_1" 57 | assert result[0]["size"] == 100 58 | assert result[0]["embedding_function"] == "test_ef" 59 | assert result[0]["num_files"] == 2 60 | assert result[1]["num_files"] == 2 61 | 62 | 63 | @pytest.mark.asyncio 64 | async def test_ls_pipe_mode(capsys): 65 | mock_client = AsyncMock() 66 | mock_collection = AsyncMock() 67 | mock_collection.name = "test_collection" 68 | mock_collection.metadata = { 69 | "path": "/test/path", 70 | "username": "test_user", 71 | "embedding_function": "test_ef", 72 | } 73 | mock_collection.count.return_value = 50 74 | mock_collection.get.return_value = {"metadatas": [{"path": "/test/path/file.txt"}]} 75 | 76 | async def mock_get_collections(client): 77 | yield mock_collection 78 | 79 | with ( 80 | patch("vectorcode.subcommands.ls.ClientManager") as MockClientManager, 81 | patch( 82 | "vectorcode.subcommands.ls.get_collection_list", 83 | return_value=[ 84 | { 85 | "project-root": "/test/path", 86 | "size": 50, 87 | "num_files": 1, 88 | "embedding_function": "test_ef", 89 | } 90 | ], 91 | ), 92 | ): 93 | mock_client = MagicMock() 94 | mock_client_manager = MockClientManager.return_value 95 | mock_client_manager._create_client = AsyncMock(return_value=mock_client) 96 | 97 | config = Config(pipe=True) 98 | await ls(config) 99 | captured = capsys.readouterr() 100 | expected_output = ( 101 | json.dumps( 102 | [ 103 | { 104 | "project-root": "/test/path", 105 | "size": 50, 106 | "num_files": 1, 107 | "embedding_function": "test_ef", 108 | } 109 | ] 110 | ) 111 | + "\n" 112 | ) 113 | assert captured.out == expected_output 114 | 115 | 116 | @pytest.mark.asyncio 117 | async def test_ls_table_mode(capsys, monkeypatch): 118 | mock_client = AsyncMock() 119 | mock_collection = AsyncMock() 120 | mock_collection.name = "test_collection" 121 | mock_collection.metadata = { 122 | "path": "/test/path", 123 | "username": "test_user", 124 | "embedding_function": "test_ef", 125 | } 126 | mock_collection.count.return_value = 50 127 | mock_collection.get.return_value = {"metadatas": [{"path": "/test/path/file.txt"}]} 128 | 129 | async def mock_get_collections(client): 130 | yield mock_collection 131 | 132 | with ( 133 | patch("vectorcode.subcommands.ls.ClientManager") as MockClientManager, 134 | patch( 135 | "vectorcode.subcommands.ls.get_collection_list", 136 | return_value=[ 137 | { 138 | "project-root": "/test/path", 139 | "size": 50, 140 | "num_files": 1, 141 | "embedding_function": "test_ef", 142 | } 143 | ], 144 | ), 145 | ): 146 | mock_client = MagicMock() 147 | mock_client_manager = MockClientManager.return_value 148 | mock_client_manager._create_client = AsyncMock(return_value=mock_client) 149 | 150 | config = Config(pipe=False) 151 | await ls(config) 152 | captured = capsys.readouterr() 153 | expected_output = ( 154 | tabulate.tabulate( 155 | [["/test/path", 50, 1, "test_ef"]], 156 | headers=[ 157 | "Project Root", 158 | "Collection Size", 159 | "Number of Files", 160 | "Embedding Function", 161 | ], 162 | ) 163 | + "\n" 164 | ) 165 | assert captured.out == expected_output 166 | 167 | # Test with HOME environment variable set 168 | monkeypatch.setenv("HOME", "/test") 169 | with ( 170 | patch("vectorcode.subcommands.ls.ClientManager") as MockClientManager, 171 | patch( 172 | "vectorcode.subcommands.ls.get_collection_list", 173 | return_value=[ 174 | { 175 | "project-root": "/test/path", 176 | "size": 50, 177 | "num_files": 1, 178 | "embedding_function": "test_ef", 179 | } 180 | ], 181 | ), 182 | ): 183 | mock_client = MagicMock() 184 | mock_client_manager = MockClientManager.return_value 185 | mock_client_manager._create_client = AsyncMock(return_value=mock_client) 186 | config = Config(pipe=False) 187 | await ls(config) 188 | captured = capsys.readouterr() 189 | expected_output = ( 190 | tabulate.tabulate( 191 | [["~/path", 50, 1, "test_ef"]], 192 | headers=[ 193 | "Project Root", 194 | "Collection Size", 195 | "Number of Files", 196 | "Embedding Function", 197 | ], 198 | ) 199 | + "\n" 200 | ) 201 | assert captured.out == expected_output 202 | -------------------------------------------------------------------------------- /lua/codecompanion/_extensions/vectorcode/init.lua: -------------------------------------------------------------------------------- 1 | ---@module "codecompanion" 2 | 3 | ---@alias sub_cmd "ls"|"query"|"vectorise"|"files_ls"|"files_rm" 4 | 5 | ---@class VectorCode.CodeCompanion.ExtensionOpts 6 | ---A table where the keys are the subcommand name (`ls`, `query`, `vectorise`, etc.) 7 | --- and the values are their config options. 8 | ---@field tool_opts? table 9 | ---Options related to the `vectorcode_toolbox` tool group 10 | ---@field tool_group? VectorCode.CodeCompanion.ToolGroupOpts 11 | ---Prompt library that automatically creates VectorCode collections on local files 12 | ---and set up prompts to let LLM search from certain directories. 13 | --- 14 | ---The keys should be the human-readable name of the prompt (as they'd appear in 15 | ---the action menu), and values would be `VectorCode.CodeCompanion.PromptFactory.Opts` 16 | ---objects. 17 | ---@field prompt_library? table 18 | 19 | local vc_config = require("vectorcode.config") 20 | local logger = vc_config.logger 21 | local utils = require("vectorcode.utils") 22 | 23 | ---@type VectorCode.CodeCompanion.ExtensionOpts|{} 24 | local default_extension_opts = { 25 | ---@type table 26 | tool_opts = { 27 | -- NOTE: the other default opts are defined in the source code files of the tools. 28 | -- `include_in_toolbox` is here so that the extension setup works as expected. 29 | ls = { include_in_toolbox = true }, 30 | query = { include_in_toolbox = true }, 31 | vectorise = { 32 | requires_approval = true, 33 | require_approval_before = true, 34 | include_in_toolbox = true, 35 | }, 36 | files_ls = {}, 37 | files_rm = { require_approval_before = true, requires_approval = true }, 38 | }, 39 | tool_group = { enabled = true, collapse = true, extras = {} }, 40 | prompt_library = require("vectorcode.integrations.codecompanion.prompts.presets"), 41 | } 42 | 43 | ---@type sub_cmd[] 44 | local valid_tools = { "ls", "query", "vectorise", "files_ls", "files_rm" } 45 | 46 | ---@param tool_opts table 47 | ---@return table 48 | local function merge_tool_opts(tool_opts) 49 | local wildcard_opts = tool_opts["*"] 50 | if wildcard_opts then 51 | for tool_name, opts in pairs(tool_opts) do 52 | if tool_name ~= "*" then 53 | tool_opts[tool_name] = vim.tbl_deep_extend("force", wildcard_opts, opts) 54 | end 55 | end 56 | tool_opts["*"] = nil 57 | end 58 | ---@cast tool_opts table 59 | return tool_opts 60 | end 61 | 62 | ---@type CodeCompanion.Extension 63 | local M = { 64 | ---@param opts VectorCode.CodeCompanion.ExtensionOpts 65 | setup = vc_config.check_cli_wrap(function(opts) 66 | if 67 | opts 68 | and opts.tool_opts 69 | and vim.iter(opts.tool_opts):any(function(_, v) 70 | return v.requires_approval ~= nil 71 | end) 72 | then 73 | vim.deprecate( 74 | "requires_approval", 75 | "require_approval_before", 76 | "1.0.0", 77 | "VectorCode", 78 | false 79 | ) 80 | end 81 | opts = vim.tbl_deep_extend("force", default_extension_opts, opts or {}) 82 | opts.tool_opts = merge_tool_opts(opts.tool_opts) 83 | logger.info("Received codecompanion extension opts:\n", opts) 84 | local cc_config = require("codecompanion.config").config 85 | local cc_integration = require("vectorcode.integrations").codecompanion 86 | local cc_chat_integration = cc_integration.chat 87 | 88 | local interactions = cc_config.strategies or cc_config.interactions 89 | for _, sub_cmd in pairs(valid_tools) do 90 | local tool_name = string.format("vectorcode_%s", sub_cmd) 91 | if interactions.chat.tools[tool_name] ~= nil then 92 | vim.notify( 93 | string.format( 94 | "There's an existing tool named `%s`. Please either remove it or rename it.", 95 | tool_name 96 | ), 97 | vim.log.levels.ERROR, 98 | vc_config.notify_opts 99 | ) 100 | logger.warn( 101 | string.format( 102 | "Not creating this tool because there is an existing tool named %s.", 103 | tool_name 104 | ) 105 | ) 106 | else 107 | local require_approval = opts.tool_opts[sub_cmd].requires_approval 108 | or opts.tool_opts[sub_cmd].require_approval_before 109 | 110 | interactions.chat.tools[tool_name] = { 111 | description = string.format("Run VectorCode %s tool", sub_cmd), 112 | callback = cc_chat_integration.make_tool(sub_cmd, opts.tool_opts[sub_cmd]), 113 | opts = { 114 | requires_approval = require_approval, 115 | require_approval_before = require_approval, 116 | }, 117 | } 118 | logger.info(string.format("%s tool has been created.", tool_name)) 119 | end 120 | end 121 | 122 | if opts.tool_group.enabled then 123 | local included_tools = vim 124 | .iter(valid_tools) 125 | :filter(function(cmd_name) 126 | return opts.tool_opts[cmd_name].include_in_toolbox 127 | end) 128 | :map(function(s) 129 | return "vectorcode_" .. s 130 | end) 131 | :totable() 132 | if opts.tool_group.extras and not vim.tbl_isempty(opts.tool_group.extras) then 133 | vim.list_extend(included_tools, opts.tool_group.extras) 134 | end 135 | logger.info( 136 | string.format( 137 | "Loading the following tools into `vectorcode_toolbox` tool group:\n%s", 138 | vim.inspect(included_tools) 139 | ) 140 | ) 141 | interactions.chat.tools.groups["vectorcode_toolbox"] = { 142 | opts = { collapse_tools = opts.tool_group.collapse }, 143 | description = "Use VectorCode to automatically build and retrieve repository-level context.", 144 | tools = included_tools, 145 | } 146 | end 147 | 148 | for name, prompt_opts in pairs(opts.prompt_library) do 149 | if prompt_opts.name ~= nil and prompt_opts.name ~= name then 150 | vim.notify( 151 | string.format( 152 | "The name of `%s` is inconsistent in the opts (`%s`).\nRenaming to `%s`.", 153 | name, 154 | prompt_opts.name, 155 | name 156 | ), 157 | vim.log.levels.WARN, 158 | vc_config.notify_opts 159 | ) 160 | end 161 | local project_root = prompt_opts.project_root 162 | if type(project_root) == "function" then 163 | project_root = project_root() 164 | end 165 | if not utils.is_directory(project_root) then 166 | vim.notify( 167 | string.format( 168 | "`%s` is not a valid directory for CodeCompanion prompt library.\nSkipping `%s`.", 169 | project_root, 170 | name 171 | ), 172 | vim.log.levels.WARN, 173 | vc_config.notify_opts 174 | ) 175 | else 176 | prompt_opts.name = name 177 | cc_config.prompt_library[name] = 178 | cc_chat_integration.prompts.register_prompt(prompt_opts) 179 | end 180 | end 181 | end), 182 | } 183 | 184 | return M 185 | -------------------------------------------------------------------------------- /lua/vectorcode/types.lua: -------------------------------------------------------------------------------- 1 | ---@module "codecompanion" 2 | 3 | ---Type definition of the retrieval result. 4 | ---@class VectorCode.QueryResult 5 | ---@field path string Path to the file 6 | ---@field document string? Content of the file 7 | ---@field chunk string? 8 | ---@field start_line integer? 9 | ---@field end_line integer? 10 | ---@field chunk_id string? 11 | ---@field summary string? Used by the CodeCompanion tool only. Not part of the backend response 12 | 13 | ---@class VectorCode.LsResult 14 | ---@field project-root string 15 | 16 | ---@class VectorCode.VectoriseResult 17 | ---@field add integer 18 | ---@field update integer 19 | ---@field removed integer 20 | ---@field skipped integer 21 | ---@field failed integer 22 | 23 | ---Type definitions for the cache of a buffer. 24 | ---@class VectorCode.Cache 25 | ---@field enabled boolean Whether the async jobs are enabled or not. If the buffer is disabled, no cache will be generated for it. 26 | ---@field job_count integer 27 | ---@field jobs table Job handle:time of creation (in seconds) 28 | ---@field last_run integer? Last time the query ran, in seconds from epoch. 29 | ---@field options VectorCode.RegisterOpts The options that the buffer was registered with. 30 | ---@field retrieval VectorCode.QueryResult[]? The latest retrieval. 31 | 32 | ---Type definitions for options accepted by `query` API. 33 | ---@class VectorCode.QueryOpts 34 | ---@field exclude_this boolean? Whether to exclude the current buffer. Default: true 35 | ---@field n_query integer? Number of results. 36 | ---@field notify boolean? Notify on new results and other key moments. 37 | ---@field timeout_ms number? Timeout (in milliseconds) for running a vectorcode command. Default: 5000 38 | 39 | ---@class VectorCode.OnSetup Some actions that may be configured to run when `setup` is called. 40 | ---@field update boolean `vectorcode update` 41 | ---@field lsp boolean whether to start LSP server on startup (default is to delay it to the first LSP request) 42 | 43 | ---@class VectorCode.CliCmds Cli commands to use 44 | ---@field vectorcode string vectorcode cli command or full path 45 | 46 | ---Options passed to `setup`. 47 | ---@class VectorCode.Opts : VectorCode.QueryOpts 48 | ---@field async_opts VectorCode.RegisterOpts Default options to use for registering a new buffer for async cache. 49 | ---@field cli_cmds VectorCode.CliCmds 50 | ---@field on_setup VectorCode.OnSetup 51 | ---@field async_backend "default"|"lsp" 52 | ---@field sync_log_env_var boolean Whether to automatically set `VECTORCODE_LOG_LEVEL` when `VECTORCODE_NVIM_LOG_LEVEL` is detected. !! WARNING: THIS MAY RESULT IN EXCESSIVE LOG MESSAGES DUE TO STDERR BEING POPULATED BY CLI LOGS 53 | 54 | ---Options for the registration of an async cache for a buffer. 55 | ---@class VectorCode.RegisterOpts: VectorCode.QueryOpts 56 | ---@field debounce? integer Seconds. Default: 10 57 | ---@field events? string|string[] autocmd events that triggers async jobs. Default: `{"BufWritePost", "InsertEnter", "BufReadPost"}` 58 | ---@field single_job? boolean Whether to restrict to 1 async job per buffer. Default: false 59 | ---@field query_cb? VectorCode.QueryCallback Function that accepts the buffer ID and returns the query message(s). Default: `require("vectorcode.utils").make_surrounding_lines_cb(-1)` 60 | ---@field run_on_register? boolean Whether to run the query when registering. Default: false 61 | ---@field project_root? string 62 | 63 | ---A unified interface used by `lsp` backend and `default` backend 64 | ---@class VectorCode.CacheBackend 65 | ---@field register_buffer fun(bufnr: integer?, opts: VectorCode.RegisterOpts) Register a buffer and create an async cache for it. 66 | ---@field deregister_buffer fun(bufnr: integer?, opts: {notify: boolean}?) Deregister a buffer and destroy its async cache. 67 | ---@field query_from_cache fun(bufnr: integer?, opts: {notify: boolean}?): VectorCode.QueryResult[] Get the cached documents. 68 | ---@field buf_is_registered fun(bufnr: integer?): boolean Checks if a buffer has been registered. 69 | ---@field buf_job_count fun(bufnr: integer?): integer Returns the number of running jobs in the background. 70 | ---@field buf_is_enabled fun(bufnr: integer?): boolean Checks if a buffer has been enabled. 71 | ---@field make_prompt_component fun(bufnr: integer?, component_cb: (fun(result: VectorCode.QueryResult): string)?): {content: string, count: integer} Compile the retrieval results into a string. 72 | ---@field async_check fun(check_item: string?, on_success: fun(out: vim.SystemCompleted)?, on_failure: fun(out: vim.SystemCompleted)?) Checks if VectorCode has been configured properly for your project. 73 | 74 | --- This class defines the options available to the CodeCompanion tool. 75 | ---@class VectorCode.CodeCompanion.ToolOpts 76 | --- Whether to use the LSP backend. Default: `false` 77 | ---@field use_lsp boolean? 78 | ---@field requires_approval boolean? 79 | ---@field require_approval_before boolean? 80 | --- Whether this tool should be included in `vectorcode_toolbox` 81 | ---@field include_in_toolbox boolean? 82 | 83 | ---@class VectorCode.CodeCompanion.LsToolOpts: VectorCode.CodeCompanion.ToolOpts 84 | 85 | ---@class VectorCode.CodeCompanion.FilesLsToolOpts: VectorCode.CodeCompanion.ToolOpts 86 | 87 | ---@class VectorCode.CodeCompanion.FilesRmToolOpts: VectorCode.CodeCompanion.ToolOpts 88 | 89 | ---@class VectorCode.CodeCompanion.QueryToolOpts: VectorCode.CodeCompanion.ToolOpts 90 | --- Maximum number of results provided to the LLM. 91 | --- You may set this to a table to configure different values for document/chunk mode. 92 | --- When set to negative values, it means unlimited. 93 | --- Default: `{ document = -1, chunk = -1 }` 94 | ---@field max_num integer|{document:integer, chunk: integer}|nil 95 | --- Default number of results provided to the LLM. 96 | --- This value is written in the system prompt and tool description. 97 | --- Users may ask the LLM to request a different number of results in the chat. 98 | --- You may set this to a table to configure different values for document/chunk mode. 99 | --- Default: `{ document = 10, chunk = 50 }` 100 | ---@field default_num? integer|{document:integer, chunk: integer} 101 | --- Whether to avoid duplicated references. Default: `true` 102 | ---@field no_duplicate boolean? 103 | --- Whether to send chunks instead of full files to the LLM. Default: `false` 104 | --- > Make sure you adjust `max_num` and `default_num` accordingly. 105 | ---@field chunk_mode? boolean 106 | ---@field summarise? VectorCode.CodeCompanion.SummariseOpts 107 | 108 | ---@class VectorCode.CodeCompanion.VectoriseToolOpts: VectorCode.CodeCompanion.ToolOpts 109 | 110 | ---@class VectorCode.CodeCompanion.ToolGroupOpts 111 | ---Whether to register the tool group 112 | ---@field enabled? boolean 113 | ---Whether to show the individual tools in the references 114 | ---@field collapse? boolean 115 | ---Other tools that you'd like to include in `vectorcode_toolbox` 116 | ---@field extras? string[] 117 | 118 | --- The result of the query tool should be structured in the following table 119 | ---@class VectorCode.CodeCompanion.QueryToolResult 120 | ---@field raw_results VectorCode.QueryResult[] 121 | ---@field count integer 122 | ---@field summary? string 123 | 124 | ---@class VectorCode.CodeCompanion.SummariseOpts 125 | ---A boolean flag that controls whether summarisation should be enabled. 126 | ---This can also be a function that returns a boolean. 127 | ---In this case, you can use this option to dynamically control whether summarisation is enabled during a chat. 128 | --- 129 | ---This function recieves 2 parameters: 130 | --- - `CodeCompanion.Chat`: the chat object; 131 | --- - `VectorCode.QueryResult[]`: a list of query results. 132 | ---@field enabled? boolean|(fun(chat: CodeCompanion.Chat, results: VectorCode.QueryResult[]):boolean) 133 | ---The adapter used for the summarisation task. When set to `nil`, the adapter from the current chat will be used. 134 | ---@field adapter? string|CodeCompanion.HTTPAdapter|fun():CodeCompanion.HTTPAdapter 135 | ---The system prompt sent to the summariser model. 136 | ---When set to a function, it'll recieve the default system prompt as the only parameter, 137 | ---and should return the new (full) system prompt. This allows you to customise or rewrite the system prompt. 138 | ---@field system_prompt? string|(fun(original_prompt: string): string) 139 | ---When set to true, include the query messages so that the LLM may make task-related summarisations. 140 | ---This happens __after__ the `system_prompt` callback processing 141 | ---@field query_augmented? boolean 142 | -------------------------------------------------------------------------------- /lua/vectorcode/init.lua: -------------------------------------------------------------------------------- 1 | local M = {} 2 | 3 | local vc_config = require("vectorcode.config") 4 | local utils = require("vectorcode.utils") 5 | local logger = vc_config.logger 6 | local get_config = vc_config.get_user_config 7 | local notify_opts = vc_config.notify_opts 8 | local jobrunner = require("vectorcode.jobrunner.cmd") 9 | local notify = vim.schedule_wrap(vim.notify) 10 | 11 | M.query = vc_config.check_cli_wrap( 12 | ---This function wraps the `query` subcommand of the VectorCode CLI. When used without the `callback` parameter, 13 | ---this function works as a synchronous function and return the results. Otherwise, this function will run async 14 | ---and the results are accessible by the `callback` function (the results will be passed as the argument to the 15 | ---callback). 16 | ---@param query_message string|string[] Query message(s) to send to the `vecctorcode query` command 17 | ---@param opts VectorCode.QueryOpts? A table of config options. If nil, the default config or `setup` config will be used. 18 | ---@param callback fun(result:VectorCode.QueryResult[])? Use the result async style. 19 | ---@return VectorCode.QueryResult[]? An array of results. 20 | function(query_message, opts, callback) 21 | logger.info("vectorcode.query: ", query_message, opts, callback) 22 | opts = vim.tbl_deep_extend("force", vc_config.get_query_opts(), opts or {}) 23 | if opts.n_query == 0 then 24 | if opts.notify then 25 | vim.notify("n_query is 0. Not sending queries.") 26 | end 27 | return {} 28 | end 29 | 30 | ---@type integer? 31 | local timeout_ms = opts.timeout_ms 32 | if timeout_ms < 1 then 33 | timeout_ms = nil 34 | end 35 | if opts.notify then 36 | vim.notify( 37 | ("Started retrieving %s documents."):format(tostring(opts.n_query)), 38 | vim.log.levels.INFO, 39 | notify_opts 40 | ) 41 | end 42 | local bufnr = vim.api.nvim_get_current_buf() 43 | local args = { "query", "--pipe", "-n", tostring(opts.n_query) } 44 | if type(query_message) == "string" then 45 | query_message = { query_message } 46 | end 47 | vim.list_extend(args, query_message) 48 | 49 | if opts.exclude_this then 50 | vim.list_extend(args, { "--exclude", vim.api.nvim_buf_get_name(bufnr) }) 51 | end 52 | 53 | logger.debug("vectorcode.query cmd args: ", args) 54 | if callback == nil then 55 | local result, err = jobrunner.run(args, timeout_ms, bufnr) 56 | if err then 57 | logger.warn(vim.inspect(err)) 58 | end 59 | logger.debug(result) 60 | return result 61 | else 62 | jobrunner.run_async(args, function(result, error) 63 | logger.debug(result) 64 | callback(result or {}) 65 | if error then 66 | logger.warn(vim.inspect(error)) 67 | end 68 | end, bufnr) 69 | end 70 | end 71 | ) 72 | 73 | M.vectorise = vc_config.check_cli_wrap( 74 | ---This function wraps the `vectorise` subcommand. By default this function doesn't pass a `--project_root` flag. 75 | ---The command will be run from the current working directory, and the normal project root detection logic in the 76 | ---CLI will work as normal. You may also pass a `project_root` as the second argument, in which case the 77 | ---`--project_root` will be passed. 78 | ---@param files string|string[] Files to vectorise. 79 | ---@param project_root string? Add the `--project_root` flag and the passed argument to the command. 80 | function(files, project_root) 81 | logger.info("vectorcode.vectorise: ", files, project_root) 82 | local args = { "--pipe", "vectorise" } 83 | if 84 | project_root ~= nil 85 | or ( 86 | M.check("config", function(obj) 87 | if obj.code == 0 then 88 | project_root = obj.stdout 89 | end 90 | end) 91 | ) 92 | then 93 | vim.list_extend(args, { "--project_root", project_root }) 94 | end 95 | if type(files) == "string" then 96 | files = { files } 97 | end 98 | local valid_files = {} 99 | for k, v in pairs(files) do 100 | if vim.fn.filereadable(v) == 1 then 101 | vim.list_extend(valid_files, { files[k] }) 102 | end 103 | end 104 | if #valid_files > 0 then 105 | vim.list_extend(args, valid_files) 106 | else 107 | return 108 | end 109 | if get_config().notify then 110 | vim.schedule(function() 111 | vim.notify( 112 | ("Vectorising %s"):format(table.concat(files, ", ")), 113 | vim.log.levels.INFO, 114 | notify_opts 115 | ) 116 | end) 117 | end 118 | local bufnr = vim.api.nvim_get_current_buf() 119 | logger.debug("vectorcode.vectorise cmd args: ", args) 120 | jobrunner.run_async(args, function(result, error) 121 | if result then 122 | if vc_config.get_user_config().notify then 123 | vim.schedule_wrap(vim.notify)( 124 | "Indexing successful.", 125 | vim.log.levels.INFO, 126 | notify_opts 127 | ) 128 | end 129 | logger.info("Vectorise result:", vim.inspect(result)) 130 | elseif error then 131 | vim.schedule_wrap(vim.notify)( 132 | string.format("Indexing failed:\n%s", vim.inspect(error)), 133 | vim.log.levels.WARN, 134 | notify_opts 135 | ) 136 | logger.warn(vim.inspect(error)) 137 | else 138 | vim.schedule_wrap(vim.notify)( 139 | "Indexing failed.", 140 | vim.log.levels.WARN, 141 | notify_opts 142 | ) 143 | end 144 | end, bufnr) 145 | end 146 | ) 147 | 148 | ---@param project_root string? 149 | M.update = vc_config.check_cli_wrap(function(project_root) 150 | logger.info("vectorcode.update: ", project_root) 151 | local args = { "update" } 152 | if project_root ~= nil and utils.is_directory(project_root) then 153 | vim.list_extend(args, { "--project_root", project_root }) 154 | end 155 | logger.debug("vectorcode.update cmd args: ", args) 156 | jobrunner.run_async(args, function(result, error) 157 | if result then 158 | if vc_config.get_user_config().notify then 159 | notify("Indexing successful.", vim.log.levels.INFO, notify_opts) 160 | end 161 | logger.info("Update result:", vim.inspect(result)) 162 | elseif error then 163 | notify( 164 | string.format("Update failed:\n%s", vim.inspect(error)), 165 | vim.log.levels.WARN, 166 | notify_opts 167 | ) 168 | logger.warn(vim.inspect(error)) 169 | else 170 | notify("Update failed.", vim.log.levels.WARN, notify_opts) 171 | end 172 | end, vim.api.nvim_get_current_buf()) 173 | 174 | if get_config().notify then 175 | notify("Updating VectorCode embeddings...", vim.log.levels.INFO, notify_opts) 176 | end 177 | end) 178 | 179 | ---@param check_item string? See `vectorcode check` documentation. 180 | ---@param stdout_cb fun(stdout: vim.SystemCompleted)? Gives user access to the exit code, stdout and signal. 181 | ---@return boolean 182 | function M.check(check_item, stdout_cb) 183 | if not vc_config.has_cli() then 184 | return false 185 | end 186 | check_item = check_item or "config" 187 | local return_code 188 | jobrunner.run_async({ "check", check_item }, function(result, error, code, signal) 189 | return_code = code 190 | if type(stdout_cb) == "function" then 191 | stdout_cb({ 192 | stdout = utils.flatten_table_to_string(result), 193 | stderr = utils.flatten_table_to_string(error, "Unknown error."), 194 | code = code, 195 | signal = signal, 196 | }) 197 | end 198 | end, 0) 199 | return return_code == 0 200 | end 201 | 202 | ---@alias prompt_type "ls"|"query"|"vectorise" 203 | ---@param item prompt_type|prompt_type[]|nil 204 | ---@return string[] 205 | M.prompts = vc_config.check_cli_wrap(function(item) 206 | local args = { "prompts", "-p" } 207 | if item then 208 | if type(item) == "string" then 209 | table.insert(args, item) 210 | else 211 | vim.list_extend(args, item) 212 | end 213 | end 214 | local result, error = jobrunner.run(args, -1, 0) 215 | if result == nil or vim.tbl_isempty(result) then 216 | logger.warn(vim.inspect(error)) 217 | if vc_config.get_user_config().notify then 218 | notify(vim.inspect(error)) 219 | end 220 | return {} 221 | end 222 | return vim.iter(result):flatten(math.huge):totable() 223 | end) 224 | 225 | M.setup = vc_config.setup 226 | return M 227 | -------------------------------------------------------------------------------- /tests/subcommands/query/test_reranker.py: -------------------------------------------------------------------------------- 1 | from typing import cast 2 | from unittest.mock import MagicMock, patch 3 | 4 | import numpy 5 | import pytest 6 | 7 | from vectorcode.cli_utils import Config, QueryInclude 8 | from vectorcode.subcommands.query.reranker import ( 9 | CrossEncoderReranker, 10 | NaiveReranker, 11 | RerankerBase, 12 | __supported_rerankers, 13 | add_reranker, 14 | get_available_rerankers, 15 | get_reranker, 16 | ) 17 | from vectorcode.subcommands.query.types import QueryResult 18 | 19 | 20 | @pytest.fixture(scope="function") 21 | def config(): 22 | return Config( 23 | n_result=3, 24 | reranker_params={ 25 | "model_name_or_path": "cross-encoder/ms-marco-MiniLM-L-6-v2", 26 | "device": "cpu", 27 | }, 28 | reranker="CrossEncoderReranker", 29 | query=["query chunk 1", "query chunk 2"], 30 | ) 31 | 32 | 33 | @pytest.fixture(scope="function") 34 | def naive_reranker_conf(): 35 | return Config( 36 | n_result=3, reranker="NaiveReranker", query=["query chunk 1", "query chunk 2"] 37 | ) 38 | 39 | 40 | @pytest.fixture(scope="function") 41 | def query_result() -> list[QueryResult]: 42 | return [ 43 | QueryResult( 44 | path="file1.py", 45 | chunk=MagicMock(), 46 | query=("query chunk 1",), 47 | scores=(0.5,), 48 | ), 49 | QueryResult( 50 | path="file2.py", 51 | chunk=MagicMock(), 52 | query=("query chunk 1",), 53 | scores=(0.9,), 54 | ), 55 | QueryResult( 56 | path="file3.py", 57 | chunk=MagicMock(), 58 | query=("query chunk 1",), 59 | scores=(0.3,), 60 | ), 61 | QueryResult( 62 | path="file2.py", 63 | chunk=MagicMock(), 64 | query=("query chunk 2",), 65 | scores=(0.6,), 66 | ), 67 | QueryResult( 68 | path="file4.py", 69 | chunk=MagicMock(), 70 | query=("query chunk 2",), 71 | scores=(0.7,), 72 | ), 73 | QueryResult( 74 | path="file3.py", 75 | chunk=MagicMock(), 76 | query=("query chunk 2",), 77 | scores=(0.2,), 78 | ), 79 | ] 80 | 81 | 82 | @pytest.fixture(scope="function") 83 | def empty_query_result(): 84 | return [] 85 | 86 | 87 | @pytest.fixture(scope="function") 88 | def query_chunks(): 89 | return ["query chunk 1", "query chunk 2"] 90 | 91 | 92 | def test_reranker_base_method_is_abstract(config): 93 | with pytest.raises((NotImplementedError, TypeError)): 94 | RerankerBase(config) 95 | 96 | 97 | def test_naive_reranker_initialization(naive_reranker_conf): 98 | """Test initialization of NaiveReranker""" 99 | reranker = NaiveReranker(naive_reranker_conf) 100 | assert reranker.n_result == 3 101 | 102 | 103 | def test_reranker_create(naive_reranker_conf): 104 | reranker = NaiveReranker.create(naive_reranker_conf) 105 | assert isinstance(reranker, NaiveReranker) 106 | 107 | 108 | def test_reranker_create_fail(): 109 | class TestReranker(RerankerBase): 110 | def __init__(self, configs, **kwargs): 111 | raise Exception 112 | 113 | with pytest.raises(Exception): 114 | TestReranker.create(Config()) 115 | 116 | 117 | @pytest.mark.asyncio 118 | async def test_naive_reranker_rerank(naive_reranker_conf, query_result): 119 | """Test basic reranking functionality of NaiveReranker""" 120 | reranker = NaiveReranker(naive_reranker_conf) 121 | result = await reranker.rerank(query_result) 122 | 123 | # Check the result is a list of paths with correct length 124 | assert isinstance(result, list) 125 | assert len(result) <= naive_reranker_conf.n_result 126 | 127 | # Check all returned items are strings (paths) 128 | for res in result: 129 | assert isinstance(res, str) 130 | 131 | 132 | @pytest.mark.asyncio 133 | async def test_naive_reranker_rerank_chunks(naive_reranker_conf, query_result): 134 | """Test basic reranking functionality of NaiveReranker""" 135 | naive_reranker_conf.include = [QueryInclude.chunk] 136 | reranker = NaiveReranker(naive_reranker_conf) 137 | chunks = {i.chunk for i in query_result} 138 | result = await reranker.rerank(query_result) 139 | 140 | # Check the result is a list of paths with correct length 141 | assert isinstance(result, list) 142 | assert len(result) <= naive_reranker_conf.n_result 143 | 144 | for res in result: 145 | assert res in chunks 146 | 147 | 148 | @pytest.mark.asyncio 149 | async def test_naive_reranker_rerank_empty_result( 150 | naive_reranker_conf, empty_query_result 151 | ): 152 | reranker = NaiveReranker(naive_reranker_conf) 153 | result = await reranker.rerank(empty_query_result) 154 | assert len(result) == 0 155 | 156 | 157 | @patch("sentence_transformers.CrossEncoder") 158 | def test_cross_encoder_reranker_initialization(mock_cross_encoder: MagicMock, config): 159 | model_name = config.reranker_params["model_name_or_path"] 160 | reranker = CrossEncoderReranker(config) 161 | # Verify constructor was called with correct parameters 162 | mock_cross_encoder.assert_called_once_with(model_name, **config.reranker_params) 163 | assert reranker.n_result == config.n_result 164 | 165 | 166 | @patch("sentence_transformers.CrossEncoder") 167 | def test_cross_encoder_reranker_initialization_fallback_model_name( 168 | mock_cross_encoder: MagicMock, config 169 | ): 170 | config.reranker_params = {} 171 | reranker = CrossEncoderReranker(config) 172 | 173 | # Verify constructor was called with correct parameters 174 | mock_cross_encoder.assert_called_once_with("cross-encoder/ms-marco-MiniLM-L-6-v2") 175 | assert reranker.n_result == config.n_result 176 | 177 | 178 | @pytest.mark.asyncio 179 | @patch("sentence_transformers.CrossEncoder") 180 | async def test_cross_encoder_reranker_rerank(mock_cross_encoder, config, query_result): 181 | mock_model = MagicMock() 182 | mock_cross_encoder.return_value = mock_model 183 | 184 | mock_model.predict = lambda x: numpy.random.random((len(x),)) 185 | 186 | reranker = CrossEncoderReranker(config) 187 | result = await reranker.rerank(query_result) 188 | 189 | # Result assertions 190 | assert isinstance(result, list) 191 | assert all(isinstance(path, str) for path in result) 192 | assert len(result) <= config.n_result 193 | 194 | 195 | @pytest.mark.asyncio 196 | async def test_naive_reranker_document_selection_logic( 197 | naive_reranker_conf, query_result 198 | ): 199 | """Test that NaiveReranker correctly selects documents based on distances""" 200 | # Create a query result with known distances 201 | 202 | reranker = NaiveReranker(naive_reranker_conf) 203 | result = await reranker.rerank(query_result) 204 | 205 | # Check that files are included (exact order depends on implementation details) 206 | assert len(result) > 0 207 | # Common files should be present 208 | assert "file2.py" in result or "file3.py" in result 209 | 210 | 211 | def test_get_reranker(config, naive_reranker_conf): 212 | assert get_reranker(naive_reranker_conf).configs.reranker == "NaiveReranker" 213 | 214 | reranker = get_reranker(config) 215 | assert reranker.configs.reranker == "CrossEncoderReranker" 216 | 217 | reranker = cast(CrossEncoderReranker, get_reranker(config)) 218 | assert reranker.configs.reranker == "CrossEncoderReranker", ( 219 | "configs.reranker should fallback to 'CrossEncoderReranker'" 220 | ) 221 | 222 | 223 | def test_supported_rerankers_initialization(config, naive_reranker_conf): 224 | """Test that __supported_rerankers contains the expected default rerankers""" 225 | 226 | assert isinstance(get_reranker(config), CrossEncoderReranker) 227 | assert isinstance(get_reranker(naive_reranker_conf), NaiveReranker) 228 | assert len(get_available_rerankers()) == 2 229 | 230 | 231 | def test_add_reranker_success(): 232 | """Test successful registration of a new reranker""" 233 | 234 | original_count = len(get_available_rerankers()) 235 | 236 | @add_reranker 237 | class TestReranker(RerankerBase): 238 | async def compute_similarity(self, results, query_message): 239 | return [] 240 | 241 | assert len(get_available_rerankers()) == original_count + 1 242 | assert "TestReranker" in __supported_rerankers 243 | assert isinstance( 244 | get_reranker(Config(reranker="TestReranker", query=["hello world"])), 245 | TestReranker, 246 | ) 247 | __supported_rerankers.pop("TestReranker") 248 | 249 | 250 | def test_add_reranker_duplicate(): 251 | """Test duplicate reranker registration raises error""" 252 | 253 | # First registration should succeed 254 | @add_reranker 255 | class TestReranker(RerankerBase): 256 | async def compute_similarity(self, results, query_message): 257 | return [] 258 | 259 | # Second registration should fail 260 | with pytest.raises(AttributeError): 261 | add_reranker(TestReranker) 262 | __supported_rerankers.pop("TestReranker") 263 | 264 | 265 | def test_add_reranker_invalid_baseclass(): 266 | """Test that non-RerankerBase classes can't be registered""" 267 | 268 | with pytest.raises(TypeError): 269 | 270 | @add_reranker 271 | class InvalidReranker: 272 | pass 273 | -------------------------------------------------------------------------------- /src/vectorcode/subcommands/query/__init__.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | from typing import Any, cast 5 | 6 | from chromadb import Where 7 | from chromadb.api.models.AsyncCollection import AsyncCollection 8 | from chromadb.api.types import IncludeEnum, QueryResult 9 | from chromadb.errors import InvalidCollectionException, InvalidDimensionException 10 | from tree_sitter import Point 11 | 12 | from vectorcode.chunking import Chunk, StringChunker 13 | from vectorcode.cli_utils import ( 14 | Config, 15 | QueryInclude, 16 | cleanup_path, 17 | expand_globs, 18 | expand_path, 19 | ) 20 | from vectorcode.common import ( 21 | ClientManager, 22 | get_collection, 23 | get_embedding_function, 24 | verify_ef, 25 | ) 26 | from vectorcode.subcommands.query import types as vectorcode_types 27 | from vectorcode.subcommands.query.reranker import ( 28 | RerankerError, 29 | get_reranker, 30 | ) 31 | 32 | logger = logging.getLogger(name=__name__) 33 | 34 | 35 | def convert_query_results( 36 | chroma_result: QueryResult, queries: list[str] 37 | ) -> list[vectorcode_types.QueryResult]: 38 | """Convert chromadb query result to in-house query results""" 39 | assert chroma_result["documents"] is not None 40 | assert chroma_result["distances"] is not None 41 | assert chroma_result["metadatas"] is not None 42 | assert chroma_result["ids"] is not None 43 | 44 | chroma_results_list: list[vectorcode_types.QueryResult] = [] 45 | for q_i in range(len(queries)): 46 | q = queries[q_i] 47 | documents = chroma_result["documents"][q_i] 48 | distances = chroma_result["distances"][q_i] 49 | metadatas = chroma_result["metadatas"][q_i] 50 | ids = chroma_result["ids"][q_i] 51 | for doc, dist, meta, _id in zip(documents, distances, metadatas, ids): 52 | chunk = Chunk(text=doc, id=_id) 53 | if meta.get("start"): 54 | chunk.start = Point(int(meta.get("start", 0)), 0) 55 | if meta.get("end"): 56 | chunk.end = Point(int(meta.get("end", 0)), 0) 57 | if meta.get("path"): 58 | chunk.path = str(meta["path"]) 59 | chroma_results_list.append( 60 | vectorcode_types.QueryResult( 61 | chunk=chunk, 62 | path=str(meta.get("path", "")), 63 | query=(q,), 64 | scores=(-dist,), 65 | ) 66 | ) 67 | return chroma_results_list 68 | 69 | 70 | async def get_query_result_files( 71 | collection: AsyncCollection, configs: Config 72 | ) -> list[str | Chunk]: 73 | query_chunks = [] 74 | assert configs.query, "Query messages cannot be empty." 75 | chunker = StringChunker(configs) 76 | for q in configs.query: 77 | query_chunks.extend(str(i) for i in chunker.chunk(q)) 78 | 79 | configs.query_exclude = [ 80 | expand_path(i, True) 81 | for i in await expand_globs(configs.query_exclude) 82 | if os.path.isfile(i) 83 | ] 84 | if (await collection.count()) == 0: 85 | logger.error("Empty collection!") 86 | return [] 87 | try: 88 | if len(configs.query_exclude): 89 | logger.info(f"Excluding {len(configs.query_exclude)} files from the query.") 90 | filter: dict[str, Any] = {"path": {"$nin": configs.query_exclude}} 91 | else: 92 | filter = {} 93 | num_query = configs.n_result 94 | if QueryInclude.chunk in configs.include: 95 | if filter: 96 | filter = {"$and": [filter.copy(), {"start": {"$gte": 0}}]} 97 | else: 98 | filter["start"] = {"$gte": 0} 99 | else: 100 | num_query = await collection.count() 101 | if configs.query_multiplier > 0: 102 | num_query = min( 103 | int(configs.n_result * configs.query_multiplier), 104 | await collection.count(), 105 | ) 106 | logger.info(f"Querying {num_query} chunks for reranking.") 107 | query_embeddings = get_embedding_function(configs)(query_chunks) 108 | if isinstance(configs.embedding_dims, int) and configs.embedding_dims > 0: 109 | query_embeddings = [e[: configs.embedding_dims] for e in query_embeddings] 110 | chroma_query_results: QueryResult = await collection.query( 111 | query_embeddings=query_embeddings, 112 | n_results=num_query, 113 | include=[ 114 | IncludeEnum.metadatas, 115 | IncludeEnum.distances, 116 | IncludeEnum.documents, 117 | ], 118 | where=cast(Where, filter) or None, 119 | ) 120 | except IndexError: 121 | # no results found 122 | return [] 123 | 124 | reranker = get_reranker(configs) 125 | converted_results = convert_query_results(chroma_query_results, configs.query) 126 | return await reranker.rerank(converted_results) 127 | 128 | 129 | async def build_query_results( 130 | collection: AsyncCollection, configs: Config 131 | ) -> list[dict[str, str | int]]: 132 | assert configs.project_root 133 | 134 | def make_output_path(path: str, absolute: bool) -> str: 135 | if absolute: 136 | if os.path.isabs(path): 137 | return path 138 | return os.path.abspath(os.path.join(str(configs.project_root), path)) 139 | else: 140 | rel_path = os.path.relpath(path, configs.project_root) 141 | if isinstance(rel_path, bytes): # pragma: nocover 142 | # for some reasons, some python versions report that `os.path.relpath` returns a string. 143 | rel_path = rel_path.decode() 144 | return rel_path 145 | 146 | structured_result = [] 147 | for res in await get_query_result_files(collection, configs): 148 | if isinstance(res, str): 149 | output_path = make_output_path(res, configs.use_absolute_path) 150 | io_path = make_output_path(res, True) 151 | if not os.path.isfile(io_path): 152 | logger.warning(f"{io_path} is no longer a valid file.") 153 | continue 154 | with open(io_path) as fin: 155 | structured_result.append({"path": output_path, "document": fin.read()}) 156 | else: 157 | res = cast(Chunk, res) 158 | assert res.path, f"{res} has no `path` attribute." 159 | structured_result.append( 160 | { 161 | "path": make_output_path(res.path, configs.use_absolute_path) 162 | if res.path is not None 163 | else None, 164 | "chunk": res.text, 165 | "start_line": res.start.row if res.start is not None else None, 166 | "end_line": res.end.row if res.end is not None else None, 167 | "chunk_id": res.id, 168 | } 169 | ) 170 | for result in structured_result: 171 | if result.get("path") is not None: 172 | result["path"] = cleanup_path(result["path"]) 173 | return structured_result 174 | 175 | 176 | async def query(configs: Config) -> int: 177 | if ( 178 | QueryInclude.chunk in configs.include 179 | and QueryInclude.document in configs.include 180 | ): 181 | logger.error( 182 | "Having both chunk and document in the output is not supported!", 183 | ) 184 | return 1 185 | async with ClientManager().get_client(configs) as client: 186 | try: 187 | collection = await get_collection(client, configs, False) 188 | if not verify_ef(collection, configs): 189 | return 1 190 | except (ValueError, InvalidCollectionException) as e: 191 | logger.error( 192 | f"{e.__class__.__name__}: There's no existing collection for {configs.project_root}", 193 | ) 194 | return 1 195 | except InvalidDimensionException as e: 196 | logger.error( 197 | f"{e.__class__.__name__}: The collection was embedded with a different embedding model.", 198 | ) 199 | return 1 200 | except IndexError as e: # pragma: nocover 201 | logger.error( 202 | f"{e.__class__.__name__}: Failed to get the collection. Please check your config." 203 | ) 204 | return 1 205 | 206 | if not configs.pipe: 207 | print("Starting querying...") 208 | 209 | if QueryInclude.chunk in configs.include: 210 | if len((await collection.get(where={"start": {"$gte": 0}}))["ids"]) == 0: 211 | logger.warning( 212 | """ 213 | This collection doesn't contain line range metadata. Falling back to `--include path document`. 214 | Please re-vectorise it to use `--include chunk`.""", 215 | ) 216 | configs.include = [QueryInclude.path, QueryInclude.document] 217 | 218 | try: 219 | structured_result = await build_query_results(collection, configs) 220 | except RerankerError as e: # pragma: nocover 221 | # error logs should be handled where they're raised 222 | logger.error(f"{e.__class__.__name__}") 223 | return 1 224 | 225 | if configs.pipe: 226 | print(json.dumps(structured_result)) 227 | else: 228 | for idx, result in enumerate(structured_result): 229 | for include_item in configs.include: 230 | print(f"{include_item.to_header()}{result.get(include_item.value)}") 231 | if idx != len(structured_result) - 1: 232 | print() 233 | return 0 234 | -------------------------------------------------------------------------------- /lua/vectorcode/cacher/default.lua: -------------------------------------------------------------------------------- 1 | ---@type VectorCode.CacheBackend 2 | local M = {} 3 | 4 | local utils = require("vectorcode.utils") 5 | local vc_config = require("vectorcode.config") 6 | local notify_opts = vc_config.notify_opts 7 | local jobrunner = require("vectorcode.jobrunner.cmd") 8 | 9 | local logger = vc_config.logger 10 | 11 | ---@type table 12 | local CACHE = {} 13 | 14 | ---@param bufnr integer 15 | local function kill_jobs(bufnr) 16 | ---@type VectorCode.Cache? 17 | local cache = CACHE[bufnr] 18 | if cache ~= nil then 19 | for job_pid, is_running in pairs(cache.jobs) do 20 | if type(is_running) == "number" then 21 | vim.uv.kill(job_pid, 15) 22 | end 23 | end 24 | end 25 | end 26 | 27 | ---@param query_message string|string[] 28 | ---@param buf_nr integer 29 | local function async_runner(query_message, buf_nr) 30 | if CACHE[buf_nr] == nil or not CACHE[buf_nr].enabled then 31 | return 32 | end 33 | local buf_name 34 | vim.schedule(function() 35 | buf_name = vim.api.nvim_buf_get_name(buf_nr) 36 | logger.debug("Started default cacher job on :", buf_name) 37 | end) 38 | ---@type VectorCode.Cache 39 | local cache = CACHE[buf_nr] 40 | local args = { 41 | "query", 42 | "--pipe", 43 | "-n", 44 | tostring(cache.options.n_query), 45 | } 46 | 47 | if type(query_message) == "string" then 48 | query_message = { query_message } 49 | end 50 | vim.list_extend(args, query_message) 51 | 52 | if cache.options.exclude_this then 53 | vim.list_extend(args, { "--exclude", vim.api.nvim_buf_get_name(buf_nr) }) 54 | end 55 | 56 | local project_root = cache.options.project_root 57 | if project_root ~= nil then 58 | assert( 59 | utils.is_directory(project_root), 60 | ("%s is not a valid directory!"):format(project_root) 61 | ) 62 | vim.list_extend(args, { "--project_root", project_root }) 63 | end 64 | 65 | if cache.options.single_job then 66 | kill_jobs(buf_nr) 67 | end 68 | 69 | CACHE[buf_nr].job_count = CACHE[buf_nr].job_count + 1 70 | logger.debug("vectorcode default cacher job args: ", args) 71 | 72 | -- jobrunner is assumed to be defined at the module level, e.g., local jobrunner = require("vectorcode.jobrunner.cmd") 73 | local job_pid 74 | job_pid = jobrunner.run_async( 75 | args, 76 | function(json_result, stderr_error, exit_code, signal) 77 | if not M.buf_is_registered(buf_nr) then 78 | return 79 | end 80 | logger.debug("vectorcode ", buf_name, " default cacher results: ", json_result) 81 | CACHE[buf_nr].job_count = CACHE[buf_nr].job_count - 1 82 | assert(job_pid ~= nil, "Failed to fetch the job pid.") 83 | CACHE[buf_nr].jobs[job_pid] = nil 84 | 85 | if exit_code ~= 0 then 86 | vim.schedule(function() 87 | if CACHE[buf_nr].options.notify then 88 | if signal == 15 then 89 | vim.notify("Retrieval aborted.", vim.log.levels.INFO, notify_opts) 90 | else 91 | vim.notify( 92 | "Retrieval failed:\\n" .. table.concat(stderr_error, "\n"), 93 | vim.log.levels.WARN, 94 | notify_opts 95 | ) 96 | end 97 | end 98 | end) 99 | return 100 | end 101 | cache = CACHE[buf_nr] 102 | cache.retrieval = json_result or {} 103 | vim.schedule(function() 104 | if cache.options.notify then 105 | vim.notify( 106 | ("Caching for buffer %d has completed."):format(buf_nr), 107 | vim.log.levels.INFO, 108 | notify_opts 109 | ) 110 | end 111 | end) 112 | end, 113 | buf_nr 114 | ) 115 | 116 | ---@type VectorCode.Cache 117 | cache = CACHE[buf_nr] 118 | if job_pid then 119 | cache.last_run = vim.uv.clock_gettime("realtime").sec 120 | cache.jobs[job_pid] = vim.uv.clock_gettime("realtime").sec 121 | end 122 | vim.schedule(function() 123 | if cache.options.notify then 124 | vim.notify( 125 | ("Caching for buffer %d has started."):format(buf_nr), 126 | vim.log.levels.INFO, 127 | notify_opts 128 | ) 129 | end 130 | end) 131 | end 132 | 133 | M.register_buffer = vc_config.check_cli_wrap( 134 | ---This function registers a buffer to be cached by VectorCode. The 135 | ---registered buffer can be acquired by the `query_from_cache` API. 136 | ---The retrieval of the files occurs in the background, so this 137 | ---function will not block the main thread. 138 | --- 139 | ---NOTE: this function uses an autocommand to track the changes to the buffer and trigger retrieval. 140 | ---@param bufnr integer? Default to the current buffer. 141 | ---@param opts VectorCode.RegisterOpts? Async options. 142 | function(bufnr, opts) 143 | if bufnr == 0 or bufnr == nil then 144 | bufnr = vim.api.nvim_get_current_buf() 145 | end 146 | logger.info( 147 | ("Registering buffer %s %s for default cacher."):format( 148 | bufnr, 149 | vim.api.nvim_buf_get_name(bufnr) 150 | ) 151 | ) 152 | if M.buf_is_registered(bufnr) then 153 | opts = vim.tbl_deep_extend("force", CACHE[bufnr].options, opts or {}) 154 | end 155 | opts = 156 | vim.tbl_deep_extend("force", vc_config.get_user_config().async_opts, opts or {}) 157 | 158 | if M.buf_is_registered(bufnr) then 159 | -- update the options and/or query_cb 160 | CACHE[bufnr].options = 161 | vim.tbl_deep_extend("force", CACHE[bufnr].options, opts or {}) 162 | logger.debug( 163 | ("Updated `default` cacher opts for buffer %s:\n%s"):format( 164 | bufnr, 165 | vim.inspect(opts) 166 | ) 167 | ) 168 | else 169 | CACHE[bufnr] = { 170 | enabled = true, 171 | retrieval = nil, 172 | options = opts, 173 | jobs = {}, 174 | job_count = 0, 175 | } 176 | end 177 | if opts.run_on_register then 178 | async_runner(opts.query_cb(bufnr), bufnr) 179 | end 180 | local group = vim.api.nvim_create_augroup( 181 | ("VectorCodeCacheGroup%d"):format(bufnr), 182 | { clear = true } 183 | ) 184 | vim.api.nvim_create_autocmd(opts.events, { 185 | group = group, 186 | callback = function() 187 | assert(CACHE[bufnr] ~= nil, "buffer vectorcode cache not registered") 188 | local cache = CACHE[bufnr] 189 | if 190 | cache.last_run == nil 191 | or (vim.uv.clock_gettime("realtime").sec - cache.last_run) > opts.debounce 192 | then 193 | local cb = cache.options.query_cb 194 | assert(type(cb) == "function", "`cb` should be a function.") 195 | async_runner(cb(bufnr), bufnr) 196 | end 197 | end, 198 | buffer = bufnr, 199 | desc = "Run query on certain autocmd", 200 | }) 201 | vim.api.nvim_create_autocmd("BufWinLeave", { 202 | buffer = bufnr, 203 | desc = "Kill all running VectorCode async jobs.", 204 | group = group, 205 | callback = function() 206 | kill_jobs(bufnr) 207 | end, 208 | }) 209 | end 210 | ) 211 | 212 | M.deregister_buffer = vc_config.check_cli_wrap( 213 | ---This function deregisters a buffer from VectorCode. This will kill all 214 | ---running jobs, delete cached results, and deregister the autocommands 215 | ---associated with the buffer. If the caching has not been registered, an 216 | ---error notification will bef ired. 217 | ---@param bufnr integer? 218 | ---@param opts {notify:boolean} 219 | function(bufnr, opts) 220 | opts = opts or { notify = false } 221 | if bufnr == nil or bufnr == 0 then 222 | bufnr = vim.api.nvim_get_current_buf() 223 | end 224 | logger.info( 225 | ("Deregistering buffer %s %s"):format(bufnr, vim.api.nvim_buf_get_name(bufnr)) 226 | ) 227 | if M.buf_is_registered(bufnr) then 228 | kill_jobs(bufnr) 229 | vim.api.nvim_del_augroup_by_name(("VectorCodeCacheGroup%d"):format(bufnr)) 230 | CACHE[bufnr] = nil 231 | if opts.notify then 232 | vim.notify( 233 | ("VectorCode Caching has been unregistered for buffer %d."):format(bufnr), 234 | vim.log.levels.INFO, 235 | notify_opts 236 | ) 237 | end 238 | else 239 | vim.notify( 240 | ("VectorCode Caching hasn't been registered for buffer %d."):format(bufnr), 241 | vim.log.levels.ERROR, 242 | notify_opts 243 | ) 244 | end 245 | end 246 | ) 247 | 248 | ---@param bufnr integer? 249 | ---@return boolean 250 | M.buf_is_registered = function(bufnr) 251 | if bufnr == 0 or bufnr == nil then 252 | bufnr = vim.api.nvim_get_current_buf() 253 | end 254 | return type(CACHE[bufnr]) == "table" and not vim.tbl_isempty(CACHE[bufnr]) 255 | end 256 | 257 | M.query_from_cache = vc_config.check_cli_wrap( 258 | ---This function queries VectorCode from cache. Returns an array of results. Each item 259 | ---of the array is in the format of `{path="path/to/your/code.lua", document="document content"}`. 260 | ---@param bufnr integer? 261 | ---@param opts {notify: boolean}? 262 | ---@return VectorCode.QueryResult[] 263 | function(bufnr, opts) 264 | local result = {} 265 | if bufnr == 0 or bufnr == nil then 266 | bufnr = vim.api.nvim_get_current_buf() 267 | end 268 | if M.buf_is_registered(bufnr) then 269 | opts = vim.tbl_deep_extend( 270 | "force", 271 | { notify = CACHE[bufnr].options.notify }, 272 | opts or {} 273 | ) 274 | result = CACHE[bufnr].retrieval or {} 275 | local message = ("Retrieved %d documents from cache."):format(#result) 276 | logger.trace(("vectorcode cmd cacher for buf %s: %s"):format(bufnr, message)) 277 | if opts.notify then 278 | vim.schedule(function() 279 | vim.notify(message, vim.log.levels.INFO, notify_opts) 280 | end) 281 | end 282 | end 283 | return result 284 | end 285 | ) 286 | 287 | ---@alias ComponentCallback fun(result:VectorCode.QueryResult):string 288 | 289 | ---Compile the retrieval results into a string. 290 | ---@param bufnr integer 291 | ---@param component_cb ComponentCallback? The component callback that formats a retrieval result. 292 | ---@return {content:string, count:integer} 293 | function M.make_prompt_component(bufnr, component_cb) 294 | if bufnr == 0 or bufnr == nil then 295 | bufnr = vim.api.nvim_get_current_buf() 296 | end 297 | if not M.buf_is_registered(bufnr) then 298 | return { content = "", count = 0 } 299 | end 300 | if component_cb == nil then 301 | ---@type fun(result:VectorCode.QueryResult):string 302 | component_cb = function(result) 303 | return "<|file_sep|>" .. result.path .. "\n" .. result.document 304 | end 305 | end 306 | local final_component = "" 307 | local retrieval = M.query_from_cache(bufnr) 308 | for _, file in pairs(retrieval) do 309 | final_component = final_component .. component_cb(file) 310 | end 311 | return { content = final_component, count = #retrieval } 312 | end 313 | 314 | ---Checks if VectorCode has been configured properly for your project. 315 | ---See the CLI manual for details. 316 | ---@param check_item string? 317 | ---@param on_success fun(out: vim.SystemCompleted)? 318 | ---@param on_failure fun(out: vim.SystemCompleted?)? 319 | function M.async_check(check_item, on_success, on_failure) 320 | vim.deprecate( 321 | "vectorcode.cacher.default.async_check", 322 | 'require("vectorcode.cacher").utils.async_check', 323 | "0.7.0", 324 | "VectorCode", 325 | true 326 | ) 327 | require("vectorcode.cacher").utils.async_check(check_item, on_success, on_failure) 328 | end 329 | 330 | ---@param bufnr integer? 331 | ---@return integer 332 | function M.buf_job_count(bufnr) 333 | if bufnr == nil or bufnr == 0 then 334 | bufnr = vim.api.nvim_get_current_buf() 335 | end 336 | if M.buf_is_registered(bufnr) then 337 | return CACHE[bufnr].job_count 338 | else 339 | return 0 340 | end 341 | end 342 | 343 | ---@param bufnr integer? 344 | ---@return boolean 345 | function M.buf_is_enabled(bufnr) 346 | if bufnr == nil or bufnr == 0 then 347 | bufnr = vim.api.nvim_get_current_buf() 348 | end 349 | return CACHE[bufnr] ~= nil and CACHE[bufnr].enabled 350 | end 351 | 352 | return M 353 | --------------------------------------------------------------------------------