├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── .ruff.toml
├── CHANGELOG.md
├── LICENSE
├── README.md
├── docs
    ├── .gitignore
    ├── astro.config.mjs
    ├── favicon.svg
    ├── package-lock.json
    ├── package.json
    ├── public
    │   └── kit.png
    ├── src
    │   ├── components
    │   │   ├── Card.astro
    │   │   ├── CardGroup.astro
    │   │   ├── CasedCard.astro
    │   │   ├── Changelog.astro
    │   │   ├── Frame.astro
    │   │   ├── PageIntro.astro
    │   │   └── Update.astro
    │   ├── content
    │   │   ├── config.ts
    │   │   └── docs
    │   │   │   ├── README.mdx
    │   │   │   ├── api
    │   │   │       ├── code_searcher.mdx
    │   │   │       ├── dependency-analyzer.mdx
    │   │   │       ├── docstring-indexer.mdx
    │   │   │       ├── repository.mdx
    │   │   │       ├── rest-api.mdx
    │   │   │       ├── summarizer.mdx
    │   │   │       └── summary-searcher.mdx
    │   │   │   ├── changelog.mdx
    │   │   │   ├── core-concepts
    │   │   │       ├── code-summarization.mdx
    │   │   │       ├── context-assembly.mdx
    │   │   │       ├── dependency-analysis.mdx
    │   │   │       ├── docstring-indexing.mdx
    │   │   │       ├── llm-context-best-practices.mdx
    │   │   │       ├── repository-api.mdx
    │   │   │       ├── repository-versioning.mdx
    │   │   │       ├── search-approaches.mdx
    │   │   │       ├── semantic-search.mdx
    │   │   │       └── tool-calling-with-kit.mdx
    │   │   │   ├── development
    │   │   │       ├── roadmap.mdx
    │   │   │       └── running-tests.mdx
    │   │   │   ├── extending
    │   │   │       └── adding-languages.mdx
    │   │   │   ├── index.mdx
    │   │   │   ├── introduction
    │   │   │       ├── cli.mdx
    │   │   │       ├── overview.mdx
    │   │   │       ├── quickstart.mdx
    │   │   │       └── usage-guide.mdx
    │   │   │   ├── mcp
    │   │   │       └── using-kit-with-mcp.md
    │   │   │   ├── pr-reviewer.mdx
    │   │   │   ├── recipes.mdx
    │   │   │   └── tutorials
    │   │   │       ├── ai_pr_reviewer.mdx
    │   │   │       ├── codebase-qa-bot.mdx
    │   │   │       ├── codebase_summarizer.mdx
    │   │   │       ├── dependency_graph_visualizer.mdx
    │   │   │       ├── docstring_search.mdx
    │   │   │       ├── dump_repo_map.mdx
    │   │   │       ├── exploring-kit-interactively.mdx
    │   │   │       ├── integrating_supersonic.mdx
    │   │   │       ├── ollama.mdx
    │   │   │       └── recipes.mdx
    │   ├── env.d.ts
    │   └── styles
    │   │   ├── fonts
    │   │       ├── IBMPlexSansV.ttf
    │   │       └── iAWriterQuattroV.ttf
    │   │   └── theme.css
    └── tsconfig.json
├── package-lock.json
├── pyproject.toml
├── scripts
    ├── benchmark.py
    ├── format.sh
    ├── index.py
    ├── release.sh
    ├── test.sh
    ├── test_ollama_local.py
    └── typecheck.sh
├── src
    └── kit
    │   ├── __init__.py
    │   ├── api
    │       ├── __init__.py
    │       ├── app.py
    │       └── registry.py
    │   ├── cli.py
    │   ├── code_searcher.py
    │   ├── context_extractor.py
    │   ├── dependency_analyzer
    │       ├── __init__.py
    │       ├── dependency_analyzer.py
    │       ├── python_dependency_analyzer.py
    │       └── terraform_dependency_analyzer.py
    │   ├── docstring_indexer.py
    │   ├── llm_context.py
    │   ├── mcp
    │       ├── __init__.py
    │       ├── __main__.py
    │       ├── main.py
    │       └── server.py
    │   ├── pr_review
    │       ├── README.md
    │       ├── ROADMAP.md
    │       ├── __init__.py
    │       ├── __main__.py
    │       ├── agentic_reviewer.py
    │       ├── cache.py
    │       ├── config.py
    │       ├── cost_tracker.py
    │       ├── debug.py
    │       ├── diff_parser.py
    │       ├── example_reviews
    │       │   ├── README.md
    │       │   ├── biopython_204_documentation_fix.md
    │       │   ├── fastapi_11935_standard_dependencies.md
    │       │   ├── model_comparison_fastapi_11935.md
    │       │   └── react_dev_6986_branding_menu.md
    │       ├── file_prioritizer.py
    │       ├── line_ref_fixer.py
    │       ├── matrix_tester.py
    │       ├── reviewer.py
    │       ├── test_accuracy.py
    │       └── validator.py
    │   ├── queries
    │       ├── c
    │       │   └── tags.scm
    │       ├── go
    │       │   └── tags.scm
    │       ├── hcl
    │       │   └── tags.scm
    │       ├── java
    │       │   └── tags.scm
    │       ├── javascript
    │       │   └── tags.scm
    │       ├── python
    │       │   └── tags.scm
    │       ├── ruby
    │       │   └── tags.scm
    │       ├── rust
    │       │   └── tags.scm
    │       └── typescript
    │       │   └── tags.scm
    │   ├── repo_mapper.py
    │   ├── repository.py
    │   ├── summaries.py
    │   ├── tool_schemas.py
    │   ├── tree_sitter_symbol_extractor.py
    │   └── vector_searcher.py
├── test.sh
├── tests
    ├── README.md
    ├── conftest.py
    ├── evals
    │   └── pr_test_set.txt
    ├── examples
    │   ├── python_dependency_analysis
    │   │   ├── README.md
    │   │   └── analyze_python_deps.py
    │   └── terraform_dependency_analysis
    │   │   ├── README.md
    │   │   ├── analyze_terraform_deps.py
    │   │   ├── compute.tf
    │   │   ├── database.tf
    │   │   ├── dns.tf
    │   │   ├── main.tf
    │   │   ├── network.tf
    │   │   ├── outputs.tf
    │   │   ├── security.tf
    │   │   ├── storage.tf
    │   │   └── variables.tf
    ├── fixtures
    │   └── realistic_repo
    │   │   ├── __init__.py
    │   │   ├── app.py
    │   │   ├── models
    │   │       ├── __init__.py
    │   │       └── user.py
    │   │   ├── services
    │   │       ├── __init__.py
    │   │       ├── auth.py
    │   │       └── db.py
    │   │   └── utils.py
    ├── golden_go.go
    ├── golden_hcl.tf
    ├── golden_python.py
    ├── golden_python_complex.py
    ├── golden_rust.rs
    ├── golden_typescript.ts
    ├── golden_typescript_complex.ts
    ├── integration
    │   └── test_api_live.py
    ├── mcp
    │   ├── test_call_tool_content.py
    │   └── test_server.py
    ├── sample_code
    │   ├── c_sample.c
    │   ├── go_sample.go
    │   ├── hcl_sample.hcl
    │   ├── java_sample.java
    │   ├── javascript_sample.js
    │   ├── python_sample.py
    │   ├── ruby_sample.rb
    │   ├── rust_sample.rs
    │   ├── tf_sample.tf
    │   ├── tsx_sample.tsx
    │   └── typescript_sample.ts
    ├── test_api_ref.py
    ├── test_cli.py
    ├── test_cli_integration.py
    ├── test_cli_ref.py
    ├── test_code_searcher.py
    ├── test_context_assembler.py
    ├── test_context_assembler_limits.py
    ├── test_context_extractor.py
    ├── test_cross_file_impact.py
    ├── test_diff_integration.py
    ├── test_diff_parser.py
    ├── test_docstring_incremental.py
    ├── test_docstring_indexer.py
    ├── test_golden_symbols.py
    ├── test_hcl_symbols.py
    ├── test_java_symbols.py
    ├── test_line_accuracy_validation.py
    ├── test_line_ref_fix.py
    ├── test_llm_line_accuracy.py
    ├── test_mcp_ref.py
    ├── test_ollama_integration.py
    ├── test_ollama_integration_real.py
    ├── test_pr_review.py
    ├── test_python_dependency_analyzer.py
    ├── test_real_pr_accuracy.py
    ├── test_registry_deterministic.py
    ├── test_repo.py
    ├── test_repo_integration.py
    ├── test_repo_mapper.py
    ├── test_resource_loading.py
    ├── test_ruby_c_symbols.py
    ├── test_sample_code_extraction.py
    ├── test_summaries.py
    ├── test_summarizer.py
    ├── test_symbol_extraction_multilang.py
    ├── test_symbol_extractor_additional.py
    ├── test_terraform_dependency_analyzer.py
    ├── test_tool_schemas.py
    ├── test_tree_sitter_languages.py
    ├── test_tree_sitter_symbol_extractor.py
    ├── test_typescript_symbol_extraction.py
    └── test_vector_searcher.py
└── uv.lock


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@v4
14 |       - name: Set up Python
15 |         uses: actions/setup-python@v5
16 |         with:
17 |           python-version: '3.13'
18 |       - name: Install uv
19 |         run: pip install uv
20 |       - name: Set up virtualenv and install deps
21 |         run: |
22 |           # Create a new virtual environment
23 |           uv venv .venv
24 |           # Activate the virtual environment
25 |           source .venv/bin/activate
26 |           # Install runtime and dev dependencies from pyproject.toml
27 |           uv pip install -e .[dev,all]
28 |       - name: Lint and Type Check
29 |         run: |
30 |           # Activate the virtual environment
31 |           source .venv/bin/activate
32 |           # Run mypy type checking
33 |           mypy src/kit
34 |           # Run ruff linter
35 |           ruff check .
36 |           # Run ruff formatter check (non-modifying)
37 |           ruff format --check .
38 |       - name: Run Tests
39 |         run: |
40 |           # Activate the virtual environment
41 |           source .venv/bin/activate
42 |           # Run tests via scripts/test.sh
43 |           bash scripts/test.sh -v
44 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # Distribution / packaging
 7 | .Python
 8 | build/
 9 | develop-eggs/
10 | dist/
11 | downloads/
12 | eggs/
13 | .eggs/
14 | lib/
15 | lib64/
16 | parts/
17 | sdist/
18 | var/
19 | *.egg-info/
20 | .installed.cfg
21 | *.egg
22 | .venv/
23 | 
24 | # Installer logs
25 | pip-log.txt
26 | pip-delete-this-directory.txt
27 | 
28 | # Unit test / coverage reports
29 | htmlcov/
30 | .tox/
31 | .nox/
32 | .coverage
33 | .coverage.*
34 | .cache
35 | nosetests.xml
36 | coverage.xml
37 | *.cover
38 | .hypothesis/
39 | .pytest_cache/
40 | test_repo*
41 | 
42 | # Jupyter Notebook
43 | .ipynb_checkpoints
44 | 
45 | # pyenv
46 | .python-version
47 | 
48 | # mypy
49 | .mypy_cache/
50 | .dmypy.json
51 | 
52 | # VSCode
53 | .vscode/
54 | 
55 | # macOS
56 | .DS_Store
57 | .vercel
58 | 
59 | # Kit specific cache and data directory
60 | .kit/
61 | # .kit/docstring_db/ # Covered by .kit/
62 | 
63 | # Test results and matrix testing artifacts
64 | test-results/
65 | *.json
66 | 


--------------------------------------------------------------------------------
/.ruff.toml:
--------------------------------------------------------------------------------
 1 | # Base configuration from pyproject.toml
 2 | line-length = 120
 3 | target-version = "py310"
 4 | 
 5 | [lint]
 6 | select = ["E", "F", "W", "I", "RUF"]
 7 | # Ignore unicode dash issues - these are stylistic
 8 | ignore = ["RUF002", "RUF003", "E741"]
 9 | 
10 | # Ignore specific issues in certain files
11 | [lint.per-file-ignores]
12 | # Example files don't need strict import ordering
13 | "tests/examples/**/*.py" = ["E402"]
14 | "src/kit/mcp/__main__.py" = ["E402"]
15 | 
16 | # Test files can have long lines in test data
17 | "tests/**/*.py" = ["E501"]
18 | 
19 | # Allow mutable default attributes in these files
20 | "src/kit/tree_sitter_symbol_extractor.py" = ["RUF012", "E501"]
21 | "src/kit/summaries.py" = ["RUF012", "E501"]
22 | 
23 | # MCP files have complex formatting needs and may exceed line limits
24 | "src/kit/mcp/**/*.py" = ["E501"]
25 | 
26 | # Library files with long lines we'll fix later
27 | "src/kit/**/*.py" = ["E501"]
28 | 
29 | # Files to skip import sorting completely
30 | "**/__init__.py" = ["F401"]
31 | 
32 | [lint.isort]
33 | known-first-party = ["kit", "cased_kit"] 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | ## [0.5.0] - 2025-05-31
 4 | 
 5 | ### Changed
 6 | - **BREAKING**: Made `sentence-transformers` and `chromadb` optional dependencies
 7 |   - Basic installation no longer includes ML/PyTorch dependencies (~50MB vs ~2GB)
 8 |   - Semantic search features now require `pip install cased-kit[ml]`
 9 |   - Full installation available via `pip install cased-kit[all]`
10 | 
11 | ### Added
12 | - New installation options for different use cases
13 | - Comprehensive PR reviewer documentation at `/pr-reviewer`
14 | - CI/CD integration examples for GitHub Actions
15 | 
16 | ### Fixed
17 | - Corrected PR review pricing documentation (actual: $0.01-0.05, not $0.10-0.30)
18 | - Removed references to deprecated Simple mode
19 | 
20 | ### Improved
21 | - PR reviewer now shows model information in logs
22 | - Added real-world PR review examples with actual costs
23 | - Enhanced documentation organization 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Cased
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
 1 | # build output
 2 | dist/
 3 | # generated types
 4 | .astro/
 5 | 
 6 | # dependencies
 7 | node_modules/
 8 | 
 9 | # logs
10 | npm-debug.log*
11 | yarn-debug.log*
12 | yarn-error.log*
13 | pnpm-debug.log*
14 | 
15 | 
16 | # environment variables
17 | .env
18 | .env.production
19 | 
20 | # macOS-specific files
21 | .DS_Store
22 | .vercel
23 | 


--------------------------------------------------------------------------------
/docs/astro.config.mjs:
--------------------------------------------------------------------------------
 1 | import { defineConfig } from "astro/config";
 2 | import starlight from "@astrojs/starlight";
 3 | import starlightLLMsTXT from "starlight-llms-txt";
 4 | 
 5 | // https://astro.build/config
 6 | export default defineConfig({
 7 |   site: "https://kit.cased.com",
 8 |   integrations: [
 9 |     starlight({
10 |       title: "kit ",
11 |       plugins: [starlightLLMsTXT()],
12 |       social: [
13 |         {
14 |           icon: "github",
15 |           href: "https://github.com/cased/kit",
16 |           label: "GitHub",
17 |         },
18 |       ],
19 |       customCss: [
20 |         // Path to your custom CSS file, relative to the project root
21 |         "./src/styles/theme.css",
22 |       ],
23 |       markdown: { headingLinks: false },
24 |       sidebar: [
25 |         {
26 |           label: " Introduction",
27 |           items: [
28 |             "introduction/overview",
29 |             "introduction/quickstart",
30 |             "introduction/usage-guide",
31 |             "introduction/cli",
32 |             "pr-reviewer",
33 |             "changelog"
34 |           ],
35 |         },
36 |         {
37 |           label: " Core Concepts",
38 |           items: [
39 |             // Manually specify order, starting with repository-api
40 |             "core-concepts/repository-api",
41 |             "core-concepts/search-approaches",
42 |             "core-concepts/code-summarization",
43 |             "core-concepts/docstring-indexing",
44 |             "core-concepts/tool-calling-with-kit",
45 |             "core-concepts/repository-versioning",
46 |             "core-concepts/semantic-search",
47 |             "core-concepts/dependency-analysis",
48 |             "core-concepts/llm-context-best-practices",
49 |             "core-concepts/context-assembly",
50 |           ],
51 |         },
52 |         {
53 |           label: " Tutorials",
54 |           items: [
55 |             "tutorials/ai_pr_reviewer",
56 |             "tutorials/codebase-qa-bot",
57 |             "tutorials/codebase_summarizer",
58 |             "tutorials/dependency_graph_visualizer",
59 |             "tutorials/docstring_search",
60 |             "tutorials/dump_repo_map",
61 |             "tutorials/integrating_supersonic",
62 |             "tutorials/recipes",
63 |             "tutorials/ollama",
64 |           ],
65 |         },
66 |         {
67 |           label: " API Reference",
68 |           autogenerate: { directory: "api" },
69 |         },
70 |         {
71 |           label: " MCP",
72 |           items: [
73 |             "mcp/using-kit-with-mcp",
74 |           ],
75 |         },
76 |         {
77 |           label: " Development",
78 |           autogenerate: { directory: "development" },
79 |         },
80 |         {
81 |           label: " Extending Kit",
82 |           autogenerate: { directory: "extending" },
83 |         },
84 |       ],
85 |     }),
86 |   ],
87 | });
88 | 


--------------------------------------------------------------------------------
/docs/favicon.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 128 128"><path fill-rule="evenodd" d="M81 36 64 0 47 36l-1 2-9-10a6 6 0 0 0-9 9l10 10h-2L0 64l36 17h2L28 91a6 6 0 1 0 9 9l9-10 1 2 17 36 17-36v-2l9 10a6 6 0 1 0 9-9l-9-9 2-1 36-17-36-17-2-1 9-9a6 6 0 1 0-9-9l-9 10v-2Zm-17 2-2 5c-4 8-11 15-19 19l-5 2 5 2c8 4 15 11 19 19l2 5 2-5c4-8 11-15 19-19l5-2-5-2c-8-4-15-11-19-19l-2-5Z" clip-rule="evenodd"/><path d="M118 19a6 6 0 0 0-9-9l-3 3a6 6 0 1 0 9 9l3-3Zm-96 4c-2 2-6 2-9 0l-3-3a6 6 0 1 1 9-9l3 3c3 2 3 6 0 9Zm0 82c-2-2-6-2-9 0l-3 3a6 6 0 1 0 9 9l3-3c3-2 3-6 0-9Zm96 4a6 6 0 0 1-9 9l-3-3a6 6 0 1 1 9-9l3 3Z"/><style>path{fill:#000}@media (prefers-color-scheme:dark){path{fill:#fff}}</style></svg>


--------------------------------------------------------------------------------
/docs/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "kit-docs",
 3 |   "type": "module",
 4 |   "version": "0.0.1",
 5 |   "scripts": {
 6 |     "dev": "astro dev",
 7 |     "start": "astro dev",
 8 |     "build": "astro build",
 9 |     "preview": "astro preview",
10 |     "astro": "astro"
11 |   },
12 |   "dependencies": {
13 |     "@astrojs/starlight": "^0.34.2",
14 |     "starlight-llms-txt": "^0.5.1",
15 |     "astro": "^5.7.10",
16 |     "astro-icon": "^1.1.5",
17 |     "sharp": "^0.33.4"
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/docs/public/kit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cased/kit/7b2d248e06f9105dd51ef7968c0573041c19a80b/docs/public/kit.png


--------------------------------------------------------------------------------
/docs/src/components/Card.astro:
--------------------------------------------------------------------------------
 1 | ---
 2 | export interface Props {
 3 |   title: string;
 4 |   href?: string;
 5 |   icon?: string;
 6 | }
 7 | const { title, href, icon } = Astro.props;
 8 | const Tag = href ? 'a' : 'div';
 9 | ---
10 | <Tag href={href} class="card-link">
11 |   <div class="card">
12 |     {icon && <div class="card-icon">{icon}</div>}
13 |     <div class="card-content-wrapper">
14 |       <div class="card-title">{title}</div>
15 |       <div class="card-body"><slot /></div>
16 |     </div>
17 |   </div>
18 | </Tag>
19 | 
20 | <style>
21 | .card-link {
22 |   text-decoration: none;
23 |   color: inherit;
24 |   display: flex;
25 |   height: 100%;
26 | }
27 | .card {
28 |   background: var(--sl-color-bg-secondary, #fff);
29 |   border: 1px solid var(--sl-color-hairline-light, #e0e0e0);
30 |   border-radius: 12px;
31 |   padding: 1.25rem;
32 |   transition: box-shadow 0.2s, transform 0.2s;
33 |   width: 100%;
34 |   display: flex;
35 |   flex-direction: column;
36 |   justify-content: space-between;
37 | }
38 | .card:hover {
39 |   box-shadow: 0 3px 16px rgba(0,0,0,0.08);
40 |   transform: translateY(-2px);
41 | }
42 | .card-title {
43 |   font-size: 1.1rem;
44 |   font-weight: 600;
45 |   margin-bottom: 0.5rem;
46 | }
47 | .card-body {
48 |   font-size: 0.95rem;
49 |   line-height: 1.45;
50 | }
51 | .card-content-wrapper {
52 |   display: flex;
53 |   flex-direction: column;
54 |   flex-grow: 1;
55 | }
56 | .card-icon {
57 |   font-size: 1.75rem;
58 |   margin-bottom: 0.5rem;
59 | }
60 | </style>
61 | 


--------------------------------------------------------------------------------
/docs/src/components/CardGroup.astro:
--------------------------------------------------------------------------------
1 | ---
2 | export interface Props { cols?: number }
3 | const { cols = 2 } = Astro.props;
4 | const grid = `repeat(${cols}, minmax(0, 1fr))`;
5 | ---
6 | <div class="group" style={`display:grid;grid-template-columns:${grid};gap:1.25rem;`}>
7 |   <slot />
8 | </div>
9 | 


--------------------------------------------------------------------------------
/docs/src/components/CasedCard.astro:
--------------------------------------------------------------------------------
 1 | ---
 2 | interface Props {
 3 |   title: string;
 4 |   href?: string;
 5 | }
 6 | 
 7 | const { title, href } = Astro.props;
 8 | const Tag = href ? "a" : "div";
 9 | ---
10 | 
11 | <article class:list={["card", { "card-link": href }]}>
12 |   <Tag
13 |     class="card-content"
14 |     href={href}
15 |   >
16 |     {
17 |       Astro.slots.has("icon") && (
18 |         <span class="icon">
19 |           <slot name="icon" />
20 |         </span>
21 |       )
22 |     }
23 |     <p class="title">
24 |       <span>{title}</span>
25 |     </p>
26 |     <div class="body">
27 |       <slot />
28 |     </div>
29 |   </Tag>
30 | </article>
31 | 
32 | <style>
33 |   .card {
34 |     border: 1px solid var(--sl-color-gray-5);
35 |     border-radius: 0.5rem;
36 |     padding: 0;
37 |     background-color: var(--sl-color-gray-8);
38 |     color: var(--sl-color-white);
39 |   }
40 | 
41 |   .card-content {
42 |     display: block;
43 |     padding: 1.25rem;
44 |     text-decoration: none;
45 |   }
46 | 
47 |   .card-link {
48 |     transition:
49 |       border-color 0.2s ease-in-out,
50 |       background-color 0.2s ease-in-out;
51 |   }
52 | 
53 |   .card-link:hover {
54 |     border-color: var(--sl-color-gray-3);
55 |     background-color: var(--sl-color-gray-7);
56 |   }
57 | 
58 |   .title {
59 |     display: flex;
60 |     gap: 0.5rem;
61 |     align-items: flex-start;
62 |     font-size: var(--sl-text-lg);
63 |     font-weight: 600;
64 |     font-family: "IBM Plex Sans";
65 |     line-height: var(--sl-line-height-headings);
66 |     color: var(--sl-color-white);
67 |     margin: 0;
68 |   }
69 | 
70 |   .body {
71 |     margin-top: 0.5rem;
72 |     font-size: var(--sl-text-sm);
73 |     color: var(--sl-color-gray-2);
74 |   }
75 | 
76 |   .icon {
77 |     display: block;
78 |     color: var(--sl-color-accent);
79 |     margin: 0 0 1rem;
80 |   }
81 | 
82 |   .icon :global(svg) {
83 |     width: 1.5rem;
84 |     height: 1.5rem;
85 |   }
86 | </style>
87 | 


--------------------------------------------------------------------------------
/docs/src/components/Changelog.astro:
--------------------------------------------------------------------------------
 1 | ---
 2 | import { getCollection } from "astro:content";
 3 | import type { CollectionEntry } from "astro:content";
 4 | 
 5 | const entries = await getCollection("changelog");
 6 | const sortedEntries = entries.sort(
 7 |   (a, b) => b.data.date.getTime() - a.data.date.getTime()
 8 | );
 9 | 
10 | // Render each entry
11 | const renderedEntries = await Promise.all(
12 |   sortedEntries.map(async (entry) => {
13 |     const { Content } = await entry.render();
14 |     return { entry, Content };
15 |   })
16 | );
17 | ---
18 | 
19 | <div class="changelog">
20 |   {entries.length === 0 && <p>No changelog entries found.</p>}
21 |   {
22 |     renderedEntries.map(({ entry, Content }) => (
23 |       <article class="changelog-entry">
24 |         <header>
25 |           <time datetime={entry.data.date.toISOString()}>
26 |             {entry.data.label}
27 |           </time>
28 |           <h3>{entry.data.title}</h3>
29 |         </header>
30 |         <div class="content">
31 |           <Content />
32 |         </div>
33 |       </article>
34 |     ))
35 |   }
36 | </div>
37 | 
38 | <style>
39 |   .changelog {
40 |     display: flex;
41 |     flex-direction: column;
42 |     gap: 3rem;
43 |   }
44 | 
45 |   .changelog-entry {
46 |     position: relative;
47 | 
48 |     img {
49 |       border-radius: var(--sl-rounded-lg);
50 |       border: 1px solid var(--sl-color-gray-5);
51 |     }
52 |   }
53 |   
54 |   header {
55 |     margin-bottom: 1rem;
56 | 
57 |     h3 {
58 |       margin-top: 0 !important;
59 |       font-size: var(--sl-text-2xl);
60 |       color: var(--sl-color-white);
61 |     }
62 |   }
63 | 
64 |   time {
65 |     color: var(--sl-color-gray-3);
66 |     font-size: var(--sl-text-sm);
67 |     white-space: nowrap;
68 |   }
69 | 
70 |   .description {
71 |     margin: 0.5rem 0 1.5rem;
72 |     color: var(--sl-color-gray-2);
73 |     font-size: var(--sl-text-lg);
74 |   }
75 | 
76 |   .content {
77 |     color: var(--sl-color-gray-2);
78 |   }
79 | 
80 |   .content :global(img) {
81 |     border-radius: var(--sl-rounded-lg);
82 |     margin: 1.5rem 0;
83 |   }
84 | </style>
85 | 


--------------------------------------------------------------------------------
/docs/src/components/Frame.astro:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | ---
 4 | 
 5 | <div class="frame">
 6 |   <slot />
 7 | </div>
 8 | 
 9 | <style>
10 |   .frame {
11 |     border-radius: 8px;
12 |     border: 1px solid var(--sl-color-gray-5);
13 |     padding: 1rem;
14 |     margin: 1.5rem 0;
15 |     overflow: hidden;
16 |   }
17 | 
18 |   .frame :global(img) {
19 |     display: block;
20 |     width: 100%;
21 |     border-radius: 4px;
22 |     margin: 0;
23 |   }
24 | </style>
25 | 


--------------------------------------------------------------------------------
/docs/src/components/PageIntro.astro:
--------------------------------------------------------------------------------
 1 | ---
 2 | export interface Props {
 3 |   class?: string;
 4 | }
 5 | 
 6 | const { class: className } = Astro.props;
 7 | ---
 8 | 
 9 | <div class:list={["page-intro", className]}>
10 |   <slot />
11 | </div>
12 | 
13 | <style is:global>
14 |   /* Global style to override Starlight's styles */
15 |   .sl-markdown-content .page-intro p {
16 |     font-family: "IBM Plex Sans" !important;
17 |     font-size: var(--sl-text-h3) !important;
18 |     font-weight: 400 !important;
19 |     line-height: 1.5 !important;
20 |     margin: 0 0 1.334rem !important;
21 |     max-width: 90ch !important;
22 |     color: oklch(from var(--sl-color-gray-3) 0.55 c h) !important;
23 |   }
24 | </style>
25 | 
26 | <style>
27 |   .page-intro {
28 |     font-family: "IBM Plex Sans";
29 |     font-size: var(--sl-text-h3);
30 |     font-weight: 400;
31 |     line-height: 1.5;
32 |     margin: 0 0 1.334rem;
33 |     max-width: 90ch;
34 |     color: var(--sl-color-gray-3);
35 |   }
36 | </style>
37 | 


--------------------------------------------------------------------------------
/docs/src/components/Update.astro:
--------------------------------------------------------------------------------
 1 | ---
 2 | interface Props {
 3 |   label: string;
 4 | }
 5 | 
 6 | const { label } = Astro.props;
 7 | ---
 8 | 
 9 | <div class="update">
10 |   <div class="update-label">{label}</div>
11 |   <div class="update-content">
12 |     <slot />
13 |   </div>
14 | </div>
15 | 
16 | <style>
17 |   .update {
18 |     display: flex;
19 |     flex-direction: row;
20 |     margin-bottom: 3rem;
21 |     padding-bottom: 3rem;
22 |     border-bottom: 1px solid var(--sl-color-hairline);
23 |   }
24 | 
25 |   .update:last-child {
26 |     border-bottom: none;
27 |   }
28 | 
29 |   .update-label {
30 |     width: 128px;
31 |     font-size: var(--sl-text-sm);
32 |     color: var(--sl-color-gray-3);
33 |     margin-right: 3rem;
34 |   }
35 | 
36 |   .update-content {
37 |     flex: 1;
38 |     margin-block-start: 0 !important;
39 |     margin-top: 0 !important;
40 | 
41 |     :global(h2) {
42 |       margin-top: 0;
43 |     }
44 | 
45 |     :global(img) {
46 |       border-radius: 0.5rem;
47 |       margin: 1.5rem 0;
48 |       max-width: 100%;
49 |     }
50 |   }
51 | </style>
52 | 


--------------------------------------------------------------------------------
/docs/src/content/config.ts:
--------------------------------------------------------------------------------
1 | import { defineCollection } from 'astro:content';
2 | import { docsSchema } from '@astrojs/starlight/schema';
3 | 
4 | export const collections = {
5 | 	docs: defineCollection({ schema: docsSchema() }),
6 | };
7 | 


--------------------------------------------------------------------------------
/docs/src/content/docs/README.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Documentation
 3 | description: Documentation for kit.
 4 | ---
 5 | 
 6 | This uses [Starlight](https://starlight.astro.build) to build the documentation.
 7 | 
 8 | ## 🧞 Commands
 9 | 
10 | All commands are run from the root of the project, from a terminal:
11 | 
12 | | Command                   | Action                                           |
13 | | :------------------------ | :----------------------------------------------- |
14 | | `pnpm install`             | Installs dependencies                            |
15 | | `pnpm dev`             | Starts local dev server at `localhost:4321`      |
16 | | `pnpm build`           | Build your production site to `./dist/`          |
17 | | `pnpm preview`         | Preview your build locally, before deploying     |
18 | | `pnpm astro ...`       | Run CLI commands like `astro add`, `astro check` |
19 | | `pnpm astro -- --help` | Get help using the Astro CLI                     |
20 | 
21 | ## 👀 Want to learn more?
22 | 
23 | Check out [Starlight’s docs](https://starlight.astro.build/), read [the Astro documentation](https://docs.astro.build), or jump into the [Astro Discord server](https://astro.build/chat).
24 | 


--------------------------------------------------------------------------------
/docs/src/content/docs/api/code_searcher.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: CodeSearcher API
 3 | ---
 4 | 
 5 | import { Aside } from '@astrojs/starlight/components';
 6 | 
 7 | This page details the API for the `CodeSearcher` class, used for performing text and regular expression searches across your repository.
 8 | 
 9 | ## Initialization
10 | 
11 | To use the `CodeSearcher`, you first need to initialize it with the path to your repository:
12 | 
13 | ```python
14 | from kit.code_searcher import CodeSearcher
15 | 
16 | searcher = CodeSearcher(repo_path="/path/to/your/repo")
17 | # Or, if you have a kit.Repository object:
18 | searcher = repo.get_code_searcher()
19 | ```
20 | 
21 | <Aside type="note">
22 |   If you are using the `kit.Repository` object, you can obtain a `CodeSearcher` instance via `repo.get_code_searcher()` which comes pre-configured with the repository path.
23 | </Aside>
24 | 
25 | ## `SearchOptions` Dataclass
26 | 
27 | The `search_text` method uses a `SearchOptions` dataclass to control search behavior. You can import it from `kit.code_searcher`.
28 | 
29 | ```python
30 | from kit.code_searcher import SearchOptions
31 | ```
32 | 
33 | **Fields:**
34 | 
35 | *   `case_sensitive` (bool): 
36 |     *   If `True` (default), the search query is case-sensitive.
37 |     *   If `False`, the search is case-insensitive.
38 | *   `context_lines_before` (int):
39 |     *   The number of lines to include before each matching line. Defaults to `0`.
40 | *   `context_lines_after` (int):
41 |     *   The number of lines to include after each matching line. Defaults to `0`.
42 | *   `use_gitignore` (bool):
43 |     *   If `True` (default), files and directories listed in the repository's `.gitignore` file will be excluded from the search.
44 |     *   If `False`, `.gitignore` rules are ignored.
45 | 
46 | ## Methods
47 | 
48 | ### `search_text(query: str, file_pattern: str = "*.py", options: Optional[SearchOptions] = None) -> List[Dict[str, Any]]`
49 | 
50 | Searches for a text pattern (which can be a regular expression) in files matching the `file_pattern`.
51 | 
52 | *   **Parameters:**
53 |     *   `query` (str): The text pattern or regular expression to search for.
54 |     *   `file_pattern` (str): A glob pattern specifying which files to search in. Defaults to `"*.py"` (all Python files).
55 |     *   `options` (Optional[SearchOptions]): An instance of `SearchOptions` to customize search behavior. If `None`, default options are used.
56 | *   **Returns:**
57 |     *   `List[Dict[str, Any]]`: A list of dictionaries, where each dictionary represents a match and contains:
58 |         *   `"file"` (str): The relative path to the file from the repository root.
59 |         *   `"line_number"` (int): The 1-indexed line number where the match occurred.
60 |         *   `"line"` (str): The content of the matching line (with trailing newline stripped).
61 |         *   `"context_before"` (List[str]): A list of strings, each being a line of context before the match.
62 |         *   `"context_after"` (List[str]): A list of strings, each being a line of context after the match.
63 | *   **Raises:**
64 |     *   The method includes basic error handling for file operations and will print an error message to the console if a specific file cannot be processed, then continue with other files.
65 | 
66 | **Example Usage:**
67 | 
68 | ```python
69 | from kit.code_searcher import CodeSearcher, SearchOptions
70 | 
71 | # Assuming 'searcher' is an initialized CodeSearcher instance
72 | 
73 | # Basic search for 'my_function' in Python files
74 | results_basic = searcher.search_text("my_function")
75 | 
76 | # Case-insensitive search with 2 lines of context before and after
77 | custom_options = SearchOptions(
78 |     case_sensitive=False,
79 |     context_lines_before=2,
80 |     context_lines_after=2
81 | )
82 | results_with_options = searcher.search_text(
83 |     query=r"my_variable\s*=\s*\d+", # Example regex query
84 |     file_pattern="*.txt",
85 |     options=custom_options
86 | )
87 | 
88 | for match in results_with_options:
89 |     print(f"Found in {match['file']} at line {match['line_number']}:")
90 |     for before_line in match['context_before']:
91 |         print(f"  {before_line}")
92 |     print(f"> {match['line']}")
93 |     for after_line in match['context_after']:
94 |         print(f"  {after_line}")
95 |     print("---")
96 | 


--------------------------------------------------------------------------------
/docs/src/content/docs/api/summarizer.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Summarizer API
 3 | ---
 4 | 
 5 | import { Aside } from '@astrojs/starlight/components';
 6 | 
 7 | This page details the API for the `Summarizer` class, used for interacting with LLMs for code summarization tasks.
 8 | 
 9 | ## Initialization
10 | 
11 | Details on how to initialize the `Summarizer` (likely via `repo.get_summarizer()`).
12 | 
13 | <Aside type="note">
14 |   Typically, you obtain a `Summarizer` instance via `repo.get_summarizer()` rather than initializing it directly.
15 | </Aside>
16 | 
17 | ## Methods
18 | 
19 | ### `summarize_file(file_path: str) -> str`
20 | 
21 | Summarizes the content of the specified file.
22 | 
23 | *   **Parameters:**
24 |     *   `file_path` (str): The path to the file within the repository.
25 | *   **Returns:**
26 |     *   `str`: The summary generated by the LLM.
27 | *   **Raises:**
28 |     *   `FileNotFoundError`: If the `file_path` does not exist in the repo.
29 |     *   `LLMError`: If there's an issue communicating with the LLM.
30 | 
31 | 
32 | ### `summarize_function(file_path: str, function_name: str) -> str`
33 | 
34 | Summarizes a specific function within the specified file.
35 | 
36 | *   **Parameters:**
37 |     *   `file_path` (str): The path to the file containing the function.
38 |     *   `function_name` (str): The name of the function to summarize.
39 | *   **Returns:**
40 |     *   `str`: The summary generated by the LLM.
41 | *   **Raises:**
42 |     *   `FileNotFoundError`: If the `file_path` does not exist in the repo.
43 |     *   `SymbolNotFoundError`: If the function cannot be found in the file.
44 |     *   `LLMError`: If there's an issue communicating with the LLM.
45 | 
46 | ### `summarize_class(file_path: str, class_name: str) -> str`
47 | 
48 | Summarizes a specific class within the specified file.
49 | 
50 | *   **Parameters:**
51 |     *   `file_path` (str): The path to the file containing the class.
52 |     *   `class_name` (str): The name of the class to summarize.
53 | *   **Returns:**
54 |     *   `str`: The summary generated by the LLM.
55 | *   **Raises:**
56 |     *   `FileNotFoundError`: If the `file_path` does not exist in the repo.
57 |     *   `SymbolNotFoundError`: If the class cannot be found in the file.
58 |     *   `LLMError`: If there's an issue communicating with the LLM.
59 | 
60 | ## Configuration
61 | 
62 | Details on the configuration options (`OpenAIConfig`, etc.).
63 | This is typically handled when calling `repo.get_summarizer(config=...)` or via environment variables read by the default `OpenAIConfig`.
64 | 
65 | The `Summarizer` currently uses `OpenAIConfig` for its LLM settings. When a `Summarizer` is initialized without a specific config object, it creates a default `OpenAIConfig` with the following parameters:
66 | 
67 | *   `api_key` (str, optional): Your OpenAI API key. Defaults to the `OPENAI_API_KEY` environment variable. If not found, an error will be raised.
68 | *   `model` (str): The OpenAI model to use. Defaults to `"gpt-4o"`.
69 | *   `temperature` (float): Sampling temperature for the LLM. Defaults to `0.7`.
70 | *   `max_tokens` (int): The maximum number of tokens to generate in the summary. Defaults to `1000`.
71 | 
72 | You can customize this by creating an `OpenAIConfig` instance and passing it to `repo.get_summarizer()`:
73 | 
74 | ```python
75 | from kit.summaries import OpenAIConfig
76 | 
77 | # Example: Customize model and temperature
78 | my_config = OpenAIConfig(model="o3-mini", temperature=0.2)
79 | summarizer = repo.get_summarizer(config=my_config)
80 | 
81 | # Now summarizer will use o3-mini with temperature 0.2
82 | summary = summarizer.summarize_file("path/to/your/file.py")
83 | ```
84 | 


--------------------------------------------------------------------------------
/docs/src/content/docs/api/summary-searcher.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: SummarySearcher API
 3 | description: API documentation for the SummarySearcher class.
 4 | ---
 5 | 
 6 | The `SummarySearcher` class provides a simple way to query an index built by [`DocstringIndexer`](/api/docstring-indexer). It takes a search query, embeds it using the same embedding function used for indexing, and retrieves the most semantically similar summaries from the vector database.
 7 | 
 8 | ## Constructor
 9 | 
10 | **Class: `SummarySearcher`**
11 | *(defined in `kit/docstring_indexer.py`)*
12 | 
13 | The `SummarySearcher` is typically initialized with an instance of `DocstringIndexer`. It uses the `DocstringIndexer`'s configured backend and embedding function to perform searches.
14 | 
15 | ```python
16 | from kit.docstring_indexer import DocstringIndexer, SummarySearcher
17 | 
18 | # Assuming 'indexer' is an already initialized DocstringIndexer instance
19 | # indexer = DocstringIndexer(repo=my_repo, summarizer=my_summarizer)
20 | # indexer.build() # Ensure the index is built
21 | 
22 | searcher = SummarySearcher(indexer=indexer)
23 | ```
24 | 
25 | **Parameters:**
26 | 
27 | *   **`indexer`** (`DocstringIndexer`, required):
28 |     An instance of `DocstringIndexer` that has been configured and preferably has had its `build()` method called. The `SummarySearcher` will use this indexer's `backend` and `embed_fn`. See the [`DocstringIndexer API docs`](./docstring-indexer) for more details on the indexer.
29 | 
30 | ## Methods
31 | 
32 | ### `search`
33 | 
34 | **Method: `SummarySearcher.search`**
35 | *(defined in `kit/docstring_indexer.py`)*
36 | 
37 | Embeds the given `query` string and searches the vector database (via the indexer's backend) for the `top_k` most similar document summaries.
38 | 
39 | ```python
40 | query_text = "How is user authentication handled?"
41 | results = searcher.search(query=query_text, top_k=3)
42 | 
43 | for result in results:
44 |     print(f"Found in: {result.get('file_path')} ({result.get('symbol_name')})")
45 |     print(f"Score: {result.get('score')}")
46 |     print(f"Summary: {result.get('summary')}")
47 |     print("----")} 
48 | ```
49 | 
50 | **Parameters:**
51 | 
52 | *   **`query`** (`str`, required):
53 |     The natural language query string to search for.
54 | *   **`top_k`** (`int`, default: `5`):
55 |     The maximum number of search results to return.
56 | 
57 | **Returns:** `List[Dict[str, Any]]`
58 | 
59 |     A list of dictionaries, where each dictionary represents a search hit.
60 |     Each hit typically includes metadata, a score, an ID, and the summary text.
61 | 


--------------------------------------------------------------------------------
/docs/src/content/docs/changelog.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Changelog"
  3 | description: "Track changes and improvements in Kit releases"
  4 | ---
  5 | 
  6 | # Changelog
  7 | 
  8 | All notable changes to Kit will be documented in this file.
  9 | 
 10 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 11 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 12 | 
 13 | ## [0.6.3]
 14 | 
 15 | ### 🐛 Bug Fixes
 16 | 
 17 | - **Symbol Type Extraction Fix**: Fixed bug where some symbol types were incorrectly processed
 18 |   - Classes and other symbol types no longer have characters incorrectly stripped
 19 |   - Added comprehensive test coverage for symbol type processing edge cases
 20 | 
 21 | ---
 22 | 
 23 | ## [0.6.2]
 24 | 
 25 | ### 🎉 Major Features
 26 | 
 27 | - **Ollama Support**: Complete local LLM inference support with Ollama
 28 |   - Zero-cost PR reviews with local models
 29 |   - Support for popular models like DeepSeek R1, Qwen2.5-coder, CodeLlama
 30 |   - Automatic provider detection from model names (e.g., `deepseek-r1:latest`)
 31 |   - First-class integration with kit's repository intelligence
 32 | 
 33 | - **DeepSeek R1 Reasoning Model Support**
 34 |   - **Thinking Token Stripping**: Automatically removes `<think>...</think>` tags from reasoning models
 35 |   - Clean, professional output without internal reasoning clutter
 36 |   - Preserves the analytical capabilities while improving output quality
 37 |   - Works in both summarization and PR review workflows
 38 | 
 39 | - **Plain Output Mode**: New `--plain` / `-p` flag for pipe-friendly output
 40 |   - Removes all formatting and status messages
 41 |   - Perfect for piping to Claude Code or other AI tools
 42 |   - Enables powerful multi-stage AI workflows (e.g., `kit review -p | claude`)
 43 |   - Quiet mode suppresses all progress/status output
 44 | 
 45 | ### ✨ Enhanced Features
 46 | 
 47 | - **CLI Improvements**
 48 |   - Added `--version` flag to display current kit version
 49 |   - Model override support: `--model` / `-m` flag for per-review model selection
 50 |   - Better error messages and help text
 51 | 
 52 | - **Documentation**
 53 |   - Comprehensive Ollama integration guides
 54 |   - Claude Code workflow examples
 55 |   - Multi-stage AI analysis patterns
 56 |   - Updated CLI reference with new flags
 57 | 
 58 | ### 🔧 Developer Experience
 59 | 
 60 | - **Community**
 61 |   - Added Discord community server for support and discussions
 62 |   - Improved README with better getting started instructions
 63 | 
 64 | - **Testing**
 65 |   - Comprehensive test suite for thinking token stripping
 66 |   - Ollama integration tests with mock scenarios
 67 |   - PR reviewer test coverage for new features
 68 | 
 69 | ### 💰 Cost Optimization
 70 | 
 71 | - **Free Local Analysis**: Use Ollama for zero-cost code analysis
 72 | - **Hybrid Workflows**: Combine free local analysis with premium cloud implementation
 73 | - **Provider Switching**: Automatic provider detection and switching
 74 | 
 75 | ---
 76 | 
 77 | ## [0.6.1]
 78 | 
 79 | ### 🔧 Improvements
 80 | 
 81 | - Enhanced line number accuracy in PR reviews
 82 | - Improved debug output for troubleshooting
 83 | - Better test coverage for core functionality
 84 | - Performance optimizations for large repositories
 85 | 
 86 | ### 🐛 Bug Fixes
 87 | 
 88 | - Fixed edge cases in symbol extraction
 89 | - Improved error handling for malformed diffs
 90 | - Better validation for GitHub URLs
 91 | 
 92 | ---
 93 | 
 94 | ## [0.6.0]
 95 | 
 96 | ### 🎉 Major Features
 97 | 
 98 | - Advanced PR reviews
 99 | - Enhanced line number context and accuracy fore reviews
100 | - Comprehensive cost tracking and pricing updates for reviews
101 | - Improved repository intelligence with better symbol analysis
102 | 
103 | ### ✨ Enhanced Features
104 | 
105 | - Better diff parsing and analysis
106 | - Enhanced file prioritization algorithms for reviews
107 | - Improved cost breakdown reporting
108 | 
109 | ---
110 | 
111 | ## Links
112 | 
113 | - [GitHub Releases](https://github.com/cased/kit/releases)
114 | - [Issues](https://github.com/cased/kit/issues)


--------------------------------------------------------------------------------
/docs/src/content/docs/core-concepts/context-assembly.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Assembling Context
 3 | ---
 4 | 
 5 | When you send code to an LLM you usually **don’t** want the entire repository –
 6 | just the *most relevant* bits.  `ContextAssembler` helps you stitch those bits
 7 | together into a single prompt-sized string.
 8 | 
 9 | ## Why you need it
10 | 
11 | * **Token limits** – GPT-4o tops out at ~128k tokens; some models less.
12 | * **Signal-to-noise** – Cut boilerplate, focus the model on what matters.
13 | * **Automatic truncation** – Keeps prompts within your chosen character budget.
14 | 
15 | ## Quick start
16 | 
17 | ```python
18 | from kit import Repository, ContextAssembler
19 | 
20 | repo = Repository("/path/to/project")
21 | 
22 | # Assume you already have chunks, e.g. from repo.search_semantic()
23 | chunks = repo.search_text("jwt decode")
24 | 
25 | assembler = ContextAssembler(max_chars=12_000)
26 | context = assembler.from_chunks(chunks)
27 | 
28 | print(context)  # → Ready to drop into your chat prompt
29 | ```
30 | 
31 | `chunks` can be any list of dicts that include a `code` key – the helper trims
32 | and orders them by length until the budget is filled.
33 | 
34 | ### Fine-tuning
35 | 
36 | | Parameter | Default | Description |
37 | |-----------|---------|-------------|
38 | | `max_chars` | `12000` | Rough character cap for the final string. |
39 | | `separator` | `"\n\n---\n\n"` | Separator inserted between chunks. |
40 | | `header` / `footer` | `""` | Optional strings prepended/appended. |
41 | 
42 | ```python
43 | assembler = ContextAssembler(
44 |     max_chars=8000,
45 |     header="### Code context\n",
46 |     footer="\n### End context",
47 | )
48 | ```
49 | 
50 | ## Combining with other tools
51 | 
52 | 1. **Vector search → assemble → chat**
53 |    ```python
54 |    chunks = repo.search_semantic("retry backoff", embed_fn, top_k=10)
55 |    prompt = assembler.from_chunks(chunks)
56 |    response = my_llm.chat(prompt + "\n\nQ: …")
57 |    ```
58 | 2. **Docstring search first** – Use `SummarySearcher` for high-level matches,
59 |    then pull full code for those files via `repo.context`.  
60 | 3. **Diff review bots** – Feed only the changed lines + surrounding context.
61 | 
62 | ## API reference
63 | 
64 | ```python
65 | from kit.llm_context import ContextAssembler
66 | ```
67 | 
68 | ### `__init__(repo, *, title=None)`
69 | 
70 | Constructs a new `ContextAssembler`.
71 | 
72 | *   `repo`: A `kit.repository.Repository` instance.
73 | *   `title` (optional): A string to prepend to the assembled context.
74 | 
75 | ### `from_chunks(chunks, max_chars=12000, separator="...", header="", footer="")`
76 | 
77 | This is the primary method for assembling context from a list of code chunks.
78 | 
79 | *   `chunks`: A list of dictionaries, each with a `"code"` key.
80 | *   `max_chars`: Target maximum character length for the output string.
81 | *   `separator`: String to insert between chunks.
82 | *   `header` / `footer`: Optional strings to wrap the entire context.
83 | 
84 | Returns a single string with concatenated, truncated chunks.
85 | 
86 | ### Other methods
87 | 
88 | While `from_chunks` is the most common entry point, `ContextAssembler` also offers methods to add specific types of context if you're building a prompt manually:
89 | 
90 | *   `add_diff(diff_text)`: Adds a Git diff.
91 | *   `add_file(file_path, highlight_changes=False)`: Adds the full content of a file.
92 | *   `add_symbol_dependencies(file_path, max_depth=1)`: Adds content of files that `file_path` depends on.
93 | *   `add_search_results(results, query)`: Formats and adds semantic search results.
94 | *   `format_context()`: Returns the accumulated context as a string.
95 | 


--------------------------------------------------------------------------------
/docs/src/content/docs/core-concepts/repository-api.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: The Repository Interface
 3 | ---
 4 | 
 5 | import { Aside } from "@astrojs/starlight/components";
 6 | 
 7 | The `kit.Repository` object is the backbone of the library. It serves as your primary interface for accessing, analyzing, and understanding codebases, regardless of their language or location (local path or remote Git URL).
 8 | 
 9 | ## Why the `Repository` Object?
10 | 
11 | Interacting directly with code across different languages, file structures, and potential locations (local vs. remote) can be cumbersome. The `Repository` object provides a **unified and consistent abstraction layer** to handle this complexity.
12 | 
13 | Key benefits include:
14 | 
15 | - **Unified Access:** Provides a single entry point to read files, extract code structures (symbols), perform searches, and more.
16 | - **Location Agnostic:** Works seamlessly with both local file paths and remote Git repository URLs (handling cloning and caching automatically when needed).
17 | - **Language Abstraction:** Leverages `tree-sitter` parsers under the hood to understand the syntax of various programming languages, allowing you to work with symbols (functions, classes, etc.) in a standardized way.
18 | - **Foundation for Tools:** Acts as the foundation upon which you can build higher-level developer tools and workflows, such as documentation generators, AI code reviewers, or semantic search engines.
19 | 
20 | ## What Can You Do with a `Repository`?
21 | 
22 | Once you instantiate a `Repository` object pointing to your target codebase:
23 | 
24 | ```python
25 | from kit import Repository
26 | 
27 | # Point to a local project
28 | my_repo = Repository("/path/to/local/project")
29 | 
30 | # Or point to a remote GitHub repo
31 | # github_repo = Repository("https://github.com/owner/repo-name")
32 | 
33 | # Or analyze a specific version
34 | # versioned_repo = Repository("https://github.com/owner/repo-name", ref="v1.2.3")
35 | ```
36 | 
37 | You can perform various code intelligence tasks:
38 | 
39 | - **Explore Structure:** Get the file tree (`.get_file_tree()`).
40 | - **Read Content:** Access the raw content of specific files (`.get_file_content()`).
41 | - **Understand Code:** Extract detailed information about functions, classes, and other symbols (`.extract_symbols()`).
42 | - **Access Git Metadata:** Get current commit SHA, branch, and remote URL (`.current_sha`, `.current_branch`, `.remote_url`).
43 | - **Search & Navigate:** Find text patterns (`.search_text()`) or semantically similar code (`.search_semantic()`).
44 | - **Analyze Dependencies:** Find where symbols are defined and used (`.find_symbol_usages()`).
45 | - **Prepare for LLMs:** Chunk code intelligently by lines or symbols (`.chunk_file_by_lines()`, `.chunk_file_by_symbols()`) and get code context around specific lines (`.extract_context_around_line()`).
46 | - **Integrate with AI:** Obtain configured summarizers (`.get_summarizer()`) or vector searchers (`.get_vector_searcher()`) for advanced AI workflows.
47 | - **Export Data:** Save the file tree, symbol information, or full repository index to structured formats like JSON (`.write_index()`, `.write_symbols()`, etc.).
48 | 
49 | The following table lists some of the key classes and tools you can access through the `Repository` object:
50 | 
51 | | Class/Tool         | Description                                    |
52 | | ------------------ | ---------------------------------------------- |
53 | | `Summarizer`       | Generate summaries of code using LLMs          |
54 | | `VectorSearcher`   | Query vector index of code for semantic search |
55 | | `DocstringIndexer` | Build vector index of LLM-generated summaries  |
56 | | `SummarySearcher`  | Query that index                               |
57 | 
58 | 
59 | <Aside type="tip">
60 |   For a complete list of methods, parameters, and detailed usage examples,
61 |   please refer to the **[Repository Class API Reference](/api/repository/)**.
62 | </Aside>
63 | 
64 | 
65 | <Aside type="note">
66 | ## File and Directory Exclusion (.gitignore support)
67 | 
68 | By default, kit automatically ignores files and directories listed in your `.gitignore` as well as `.git/` and its contents. This ensures your indexes, symbol extraction, and searches do not include build artifacts, dependencies, or version control internals.
69 | 
70 | **Override:**
71 | - This behavior is the default. If you want to include ignored files, you can override this by modifying the `RepoMapper` logic (see `src/kit/repo_mapper.py`) or subclassing it with custom exclusion rules.
72 | </Aside>
73 | 


--------------------------------------------------------------------------------
/docs/src/content/docs/core-concepts/search-approaches.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Searching
 3 | ---
 4 | 
 5 | Not sure **which `kit` feature to reach for**?  Use this page as a mental map of
 6 | search-and-discovery tools – from plain-text grep all the way to LLM-powered
 7 | semantic retrieval.
 8 | 
 9 | ## Decision table
10 | 
11 | | Your goal | Best tool | One-liner | Docs |
12 | |-----------|-----------|-----------|------|
13 | | Find an exact string or regex | `repo.search_text()` | `repo.search_text("JWT", "*.go")` | [Text search](/docs/core-concepts/semantic-search#exact-keyword) |
14 | | List symbols in a file | `repo.extract_symbols()` | `repo.extract_symbols("src/db.py")` | [Repository API](/docs/core-concepts/repository-api) |
15 | | See where a function is used | `repo.find_symbol_usages()` | `repo.find_symbol_usages("login")` | ^ |
16 | | Get a concise overview of a file / function | `Summarizer` | `summarizer.summarize_file(path)` | [Code summarization](/docs/core-concepts/code-summarization) |
17 | | Semantic search over **raw code chunks** | `VectorSearcher` | `repo.search_semantic()` | [Semantic search](/docs/core-concepts/semantic-search) |
18 | | Semantic search over **LLM summaries** | `DocstringIndexer` + `SummarySearcher` | see below | [Docstring index](/docs/core-concepts/docstring-indexing) |
19 | | Build an LLM prompt with only the *relevant* code | `ContextAssembler` | `assembler.from_chunks(chunks)` | [Context assembly](/docs/core-concepts/context-assembly) |
20 | 
21 | > **Tip:** You can mix-and-match. For instance, run a docstring search first,
22 | > then feed the matching files into `ContextAssembler` for an LLM chat.
23 | 
24 | ## Approaches in detail
25 | 
26 | ### 1. Plain-text / regex search
27 | 
28 | Fast, zero-setup, works everywhere. Use when you *know* what string you’re
29 | looking for.
30 | 
31 | ```python
32 | repo.search_text("parse_jwt", file_pattern="*.py")
33 | ```
34 | 
35 | ### 2. Symbol indexing
36 | 
37 | `extract_symbols()` uses **tree-sitter** queries (Python, JS, Go, etc.) to list
38 | functions, classes, variables – handy for nav trees or refactoring tools.
39 | 
40 | ### 3. LLM summarization
41 | 
42 | Generate natural-language summaries for files, classes, or functions with
43 | `Summarizer`.  Great for onboarding or API docs.
44 | 
45 | ### 4. Vector search (raw code)
46 | 
47 | `VectorSearcher` chunks code (symbols or lines) → embeds chunks → stores them in
48 | a local vector database.  Good when wording of the query is *similar* to the
49 | code.
50 | 
51 | ### 5. Docstring vector search
52 | 
53 | `DocstringIndexer` first *summarizes* code, then embeds the summary.  The
54 | resulting vectors capture **intent**, not syntax; queries like “retry back-off
55 | logic” match even if the code uses exponential delays without those words.
56 | 
57 | ---
58 | 
59 | Still unsure?  Start with text-search (cheap), move to vector search (smart),
60 | and layer summaries when you need *meaning* over *matching*.
61 | 


--------------------------------------------------------------------------------
/docs/src/content/docs/development/running-tests.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Running Tests
 3 | ---
 4 | 
 5 | To run tests using uv and pytest, first ensure you have the development dependencies installed:
 6 | 
 7 | ```sh
 8 | # Install all deps
 9 | uv pip install -e .
10 | ```
11 | 
12 | Then, run the full test suite using:
13 | 
14 | ```sh
15 | uv run pytest
16 | ```
17 | 
18 | Or to run a specific test file:
19 | 
20 | ```sh
21 | uv run pytest tests/test_hcl_symbols.py
22 | ```
23 | 
24 | ## Code Style and Formatting
25 | 
26 | Kit uses [Ruff](https://docs.astral.sh/ruff/) for linting, formatting, and import sorting with a line length of 120 characters. Our configuration can be found in `pyproject.toml`.
27 | 
28 | To check your code against our style guidelines:
29 | 
30 | ```sh
31 | # Run linting checks
32 | ruff check .
33 | 
34 | # Check format (doesn't modify files)
35 | ruff format --check .
36 | ```
37 | 
38 | To automatically fix linting issues and format your code:
39 | 
40 | ```sh
41 | # Fix linting issues
42 | ruff check --fix .
43 | 
44 | # Format code
45 | ruff format .
46 | ```
47 | 
48 | These checks are enforced in CI, so we recommend running them locally before pushing changes.
49 | 


--------------------------------------------------------------------------------
/docs/src/content/docs/extending/adding-languages.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Adding New Languages
 3 | ---
 4 | 
 5 | - To add a new language:
 6 |   1. Add a tree-sitter grammar and build it (see [tree-sitter docs](https://tree-sitter.github.io/tree-sitter/creating-parsers)).
 7 |   2. Add a `queries/<lang>/tags.scm` file with queries for symbols you want to extract.
 8 |   3. Add the file extension to `TreeSitterSymbolExtractor.LANGUAGES`.
 9 |   4. Write/expand tests for the new language.
10 | 
11 | **Why?**
12 | - This approach lets you support any language with a tree-sitter grammar—no need to change core logic.
13 | - `tags.scm` queries make symbol extraction flexible and community-driven.
14 | 


--------------------------------------------------------------------------------
/docs/src/content/docs/introduction/overview.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Overview
 3 | ---
 4 | 
 5 | ## kit: Code Intelligence Toolkit
 6 | 
 7 | A modular, production-grade toolkit for codebase mapping, symbol extraction, code search, and LLM-powered developer workflows. Supports multi-language codebases via `tree-sitter`.
 8 | 
 9 | `kit` features a "mid-level API" to build your own custom tools, applications, agents, and workflows: easily build code review bots, semantic code search, documentation generators, and more.
10 | 
11 | `kit` is **free and open source** with a permissive MIT license. Check it out on [GitHub](https://github.com/cased/kit).
12 | 
13 | ## Installation
14 | ### Install from PyPI
15 | ```bash
16 | # Basic installation (includes PR reviewer, no ML dependencies)
17 | pip install cased-kit
18 | 
19 | # With semantic search features (includes PyTorch, sentence-transformers)
20 | pip install cased-kit[ml]
21 | 
22 | # Everything (all features)
23 | pip install cased-kit[all]
24 | ```
25 | 
26 | ### Install from Source
27 | ```bash
28 | git clone https://github.com/cased/kit.git
29 | cd kit
30 | uv venv .venv
31 | source .venv/bin/activate
32 | uv pip install -e .
33 | ```
34 | 
35 | ## Why Use kit?
36 | 
37 | `kit` helps with:
38 | 
39 | *   **Unifying Code Access:** Provides a single, consistent `Repository` object to interact with files, symbols, and search across diverse codebases, regardless of language.
40 | *   **Deep Code Understanding:** Leverages `tree-sitter` for accurate, language-specific parsing, enabling reliable symbol extraction and structural analysis across an entire codebase.
41 | *   **Bridging Code and LLMs:** Offers tools specifically designed to chunk code effectively and retrieve relevant context for large language models, powering smarter AI developer tools.
42 | 
43 | ## Core Philosophy
44 | 
45 | `kit` aims to be a **toolkit** for building applications, agents, and workflows.
46 | It handles the low-level parsing and indexing complexity, and allows you to adapt these components to your specific needs.
47 | 
48 | We believe the building blocks for code intelligence and LLM workflows for developer tools should be free and open source,
49 | so you can build amazing products and experiences.
50 | 
51 | 
52 | ## Where to Go Next
53 | 
54 | *   **Dive into the API:** Explore the [Core Concepts](/core-concepts/repository-api) to understand the `Repository` object and its capabilities.
55 | *   **Build Something:** Follow the [Tutorials](/tutorials/ai_pr_reviewer) for step-by-step guides on creating practical tools.
56 | 
57 | ## LLM Documentation
58 | 
59 | This documentation site provides generated text files suitable for LLM consumption:
60 | 
61 | - [`/llms.txt`](/llms.txt): Entrypoint file following the llms.txt standard.
62 | - [`/llms-full.txt`](/llms-full.txt): Complete documentation content concatenated into a single file.
63 | - [`/llms-small.txt`](/llms-small.txt): Minified documentation content for models with smaller context windows.
64 | 


--------------------------------------------------------------------------------
/docs/src/content/docs/introduction/quickstart.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Quickstart
 3 | ---
 4 | 
 5 | ```bash
 6 | git clone https://github.com/cased/kit.git
 7 | cd kit
 8 | uv venv .venv
 9 | source .venv/bin/activate
10 | uv pip install -e .
11 | ```
12 | 
13 | Now, you can use kit!
14 | kit ships with a demonstration repository at `tests/fixtures/` you can use to get started. 
15 | 
16 | Try this simple Python script (e.g., save as `test_kit.py` in the `kit` directory you cloned):
17 | 
18 | ```python
19 | import kit
20 | import os
21 | 
22 | # Path to the demo repository
23 | repo_path = "tests/fixtures/realistic_repo"
24 | 
25 | print(f"Loading repository at: {repo_path}")
26 | # Ensure you have cloned the 'kit' repository and are in its root directory
27 | # for this relative path to work correctly.
28 | repo = kit.Repository(repo_path)
29 | 
30 | # Print the first 5 Python files found in the demo repo
31 | print("\nFound Python files in the demo repo (first 5):")
32 | count = 0
33 | for file in repo.files('*.py'):
34 |     print(f"- {file.path}")
35 |     count += 1
36 |     if count >= 5:
37 |         break
38 | 
39 | if count == 0:
40 |     print("No Python files found in the demo repository.")
41 | 
42 | # Extract symbols from a specific file in the demo repo (e.g., app.py)
43 | target_file = 'app.py'
44 | print(f"\nExtracting symbols from {target_file} in the demo repo (first 5):")
45 | try:
46 |     symbols = repo.extract_symbols(target_file)
47 |     if symbols:
48 |         for i, symbol in enumerate(symbols):
49 |             print(f"- {symbol.name} ({symbol.kind}) at line {symbol.range.start.line}")
50 |             if i >= 4:
51 |                 break
52 |     else:
53 |         print(f"No symbols found or file not parseable: {target_file}")
54 | except FileNotFoundError:
55 |     print(f"File not found: {target_file}")
56 | except Exception as e:
57 |     print(f"An error occurred extracting symbols: {e}")
58 | 
59 | ```
60 | 
61 | Run it with `python test_kit.py`.
62 | 
63 | Next, explore the [Usage Guide](/introduction/usage-guide) to understand the core concepts.
64 | 


--------------------------------------------------------------------------------
/docs/src/content/docs/mcp/using-kit-with-mcp.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Using kit with MCP
 3 | description: Learn how to use kit with the Model Context Protocol (MCP) for AI-powered code understanding
 4 | ---
 5 | 
 6 | Note: MCP support is currently in alpha.
 7 | 
 8 | The Model Context Protocol (MCP) provides a unified API for codebase operations, making it easy to integrate kit's capabilities with AI tools and IDEs. This guide will help you set up and use kit with MCP.
 9 | 
10 | Kit provides a MCP server implementation that exposes its code intelligence capabilities through a standardized protocol. When using kit as an MCP server, you gain access to:
11 | 
12 | - **Code Search**: Perform text-based and semantic code searches
13 | - **Code Analysis**: Extract symbols, find symbol usages, and analyze dependencies
14 | - **Code Summarization**: Create natural language summaries of code
15 | - **File Navigation**: Explore file trees and repository structure
16 | 
17 | This document guides you through setting up and using `kit` with MCP-compatible tools like Cursor or Claude Desktop.
18 | 
19 | ## What is MCP?
20 | 
21 | MCP (Model Context Protocol) is a specification that allows AI agents and development tools to interact with your codebase programmatically via a local server. `kit` implements an MCP server to expose its code intelligence features.
22 | 
23 | ## Available MCP Tools in `kit`
24 | 
25 | Currently, `kit` exposes the following functionalities via MCP tools:
26 | 
27 | *   `open_repository`: Opens a local or remote Git repository. Supports `ref` parameter for specific commits, tags, or branches.
28 | *   `get_file_tree`: Retrieves the file and directory structure of the open repository.
29 | *   `get_file_content`: Reads the content of a specific file.
30 | *   `search_code`: Performs text-based search across repository files.
31 | *   `extract_symbols`: Extracts functions, classes, and other symbols from a file.
32 | *   `find_symbol_usages`: Finds where a specific symbol is used across the repository.
33 | *   `get_code_summary`: Provides AI-generated summaries for files, functions, or classes.
34 | *   `get_git_info`: Retrieves git metadata including current SHA, branch, and remote URL.
35 | 
36 | ### Opening Repositories with Specific Versions
37 | 
38 | The `open_repository` tool supports analyzing specific versions of repositories using the optional `ref` parameter:
39 | 
40 | ```json
41 | {
42 |   "tool": "open_repository",
43 |   "arguments": {
44 |     "path_or_url": "https://github.com/owner/repo",
45 |     "ref": "v1.2.3"
46 |   }
47 | }
48 | ```
49 | 
50 | The `ref` parameter accepts:
51 | - **Commit SHAs**: `"abc123def456"`
52 | - **Tags**: `"v1.2.3"`, `"release-2024"`
53 | - **Branches**: `"main"`, `"develop"`, `"feature-branch"`
54 | 
55 | ### Accessing Git Metadata
56 | 
57 | Use the `get_git_info` tool to access repository metadata:
58 | 
59 | ```json
60 | {
61 |   "tool": "get_git_info",
62 |   "arguments": {
63 |     "repo_id": "your-repo-id"
64 |   }
65 | }
66 | ```
67 | 
68 | This returns information like current commit SHA, branch name, and remote URL - useful for understanding what version of code you're analyzing.
69 | 
70 | More MCP features are coming soon.
71 | 
72 | ## Setup
73 | 
74 | 1. After installing `kit`, configure your MCP-compatible client by adding a stanza like this to your settings:
75 | 
76 | Available environment variables for the `env` section:
77 | - `OPENAI_API_KEY`
78 | - `KIT_MCP_LOG_LEVEL`
79 | 
80 | ```json
81 | {
82 |   "mcpServers": {
83 |     "kit-mcp": {
84 |       "command": "python",
85 |       "args": ["-m", "kit.mcp"],
86 |       "env": {
87 |         "KIT_MCP_LOG_LEVEL": "DEBUG"
88 |       }
89 |     }
90 |   }
91 | }
92 | ```
93 | 
94 | The `python` executable invoked must be the one where `cased-kit` is installed.
95 | If you see `ModuleNotFoundError: No module named 'kit'`, ensure the Python
96 | interpreter your MCP client is using is the correct one.


--------------------------------------------------------------------------------
/docs/src/content/docs/tutorials/docstring_search.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Build a Docstring Search Engine
 3 | ---
 4 | 
 5 | In this tutorial you'll build a semantic search tool on top of `kit`
 6 | using **docstring-based indexing**. 
 7 | 
 8 | Why docstrings?  Summaries distill *intent* rather than syntax.  Embedding these
 9 | short natural-language strings lets the vector DB focus on meaning, giving you
10 | relevant hits even when the literal code differs (e.g., `retry()` vs
11 | `attempt_again()`).  It also keeps the index small (one embedding per file or
12 | symbol instead of dozens of raw-code chunks).
13 | 
14 | ---
15 | 
16 | ## 1. Install dependencies
17 | 
18 | ```bash
19 | uv pip install kit sentence-transformers chromadb
20 | ```
21 | 
22 | ## 2. Initialise a repo and summarizer
23 | 
24 | ```python
25 | import kit
26 | from kit import Repository, DocstringIndexer, Summarizer, SummarySearcher
27 | from sentence_transformers import SentenceTransformer
28 | 
29 | REPO_PATH = "/path/to/your/project"
30 | repo = Repository(REPO_PATH)
31 | 
32 | summarizer = repo.get_summarizer()  # defaults to OpenAIConfig
33 | ```
34 | 
35 | ## 3. Build the docstring index
36 | 
37 | ```python
38 | embed_model = SentenceTransformer("all-MiniLM-L6-v2")
39 | embed_fn = lambda txt: embed_model.encode(txt).tolist()
40 | 
41 | indexer = DocstringIndexer(repo, summarizer, embed_fn)
42 | indexer.build()          # writes REPO_PATH/.kit_cache/docstring_db
43 | ```
44 | 
45 | The first run will take time depending on repo size and LLM latency.
46 | Summaries are cached inside the vector DB (and in a meta.json within the persist_dir), 
47 | so subsequent runs are cheap if code hasn't changed.
48 | 
49 | ## 4. Query the index
50 | 
51 | ```python
52 | searcher = indexer.get_searcher()
53 | 
54 | results = searcher.search("How is the retry back-off implemented?", top_k=3)
55 | for hit in results:
56 |     print(f"→ File: {hit.get('file_path', 'N/A')}\n  Summary: {hit.get('summary', 'N/A')}")
57 | ```
58 | 
59 | You now have a semantic code searcher, using powerful docstring summaries,
60 | as easy as that.
61 | 
62 | 


--------------------------------------------------------------------------------
/docs/src/content/docs/tutorials/dump_repo_map.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Dump Repo Map
  3 | ---
  4 | 
  5 | import { Aside } from '@astrojs/starlight/components';
  6 | 
  7 | This tutorial explains how to use `kit` to dump a complete map of your repository—including the file tree and all extracted symbols—as a JSON file. This is useful for further analysis, visualization, or integration with other tools. `kit` provides a convenient method on the `Repository` object to achieve this directly.
  8 | 
  9 | ## Step 1: Create the Script
 10 | 
 11 | Create a Python script named `dump_repo_map.py` with the following content. This script uses `argparse` to accept the repository path and the desired output file path.
 12 | 
 13 | ```python
 14 | # dump_repo_map.py
 15 | from kit import Repository # Import the main Repository class
 16 | import argparse
 17 | import sys
 18 | import os
 19 | 
 20 | def main():
 21 |     parser = argparse.ArgumentParser(description="Dump a repository's file tree and symbols as JSON using kit.")
 22 |     parser.add_argument("repo_path", help="Path to the repository directory.")
 23 |     parser.add_argument("output_file", help="Path to the output JSON file.")
 24 |     args = parser.parse_args()
 25 | 
 26 |     repo_path = args.repo_path
 27 |     if not os.path.isdir(repo_path):
 28 |         print(f"Error: Repository path not found or not a directory: {repo_path}", file=sys.stderr)
 29 |         sys.exit(1)
 30 | 
 31 |     try:
 32 |         print(f"Initializing repository at: {repo_path}", file=sys.stderr)
 33 |         repo = Repository(repo_path)
 34 |         
 35 |         print(f"Dumping repository index to: {args.output_file}", file=sys.stderr)
 36 |         repo.write_index(args.output_file) # Use the direct method
 37 |         
 38 |         print(f"Successfully wrote repository map to {args.output_file}", file=sys.stderr)
 39 |     except Exception as e:
 40 |         print(f"Error processing repository: {e}", file=sys.stderr)
 41 |         sys.exit(1)
 42 | 
 43 | if __name__ == "__main__":
 44 |     main()
 45 | ```
 46 | 
 47 | ---
 48 | 
 49 | ## Step 2: Run the Script
 50 | 
 51 | Save the code above as `dump_repo_map.py`. You can then run it from your terminal, providing the path to the repository you want to map and the desired output file name:
 52 | 
 53 | ```sh
 54 | python dump_repo_map.py /path/to/repo repo_map.json
 55 | ```
 56 | 
 57 | This will create a JSON file (e.g., `repo_map.json`) containing the structure and symbols of your codebase.
 58 | 
 59 | ---
 60 | 
 61 | ## Example JSON Output
 62 | 
 63 | The output JSON file will contain a `file_tree` (also aliased as `files`) and a `symbols` map.
 64 | 
 65 | ```json
 66 | {
 67 |   "file_tree": [
 68 |     {
 69 |       "path": "src",
 70 |       "is_dir": true,
 71 |       "name": "src",
 72 |       "size": 0
 73 |     },
 74 |     {
 75 |       "path": "src/main.py",
 76 |       "is_dir": false,
 77 |       "name": "main.py",
 78 |       "size": 1024
 79 |     },
 80 |     {
 81 |       "path": "README.md",
 82 |       "is_dir": false,
 83 |       "name": "README.md",
 84 |       "size": 2048
 85 |     }
 86 |     // ... more files and directories
 87 |   ],
 88 |   "files": [
 89 |     // ... same content as file_tree ...
 90 |   ],
 91 |   "symbols": {
 92 |     "src/main.py": [
 93 |       {
 94 |         "type": "function", 
 95 |         "name": "main", 
 96 |         "start_line": 10, 
 97 |         "end_line": 25, 
 98 |         "code": "def main():\n  pass"
 99 |       },
100 |       {
101 |         "type": "class", 
102 |         "name": "App", 
103 |         "start_line": 30, 
104 |         "end_line": 55
105 |       }
106 |     ],
107 |     "src/utils.py": [
108 |       {
109 |         "type": "function", 
110 |         "name": "helper", 
111 |         "start_line": 5, 
112 |         "end_line": 12
113 |       }
114 |     ]
115 |     // ... more files and their symbols
116 |   }
117 | }
118 | ```
119 | 
120 | <Aside type="note">
121 |   The exact content and structure of symbol information (e.g., inclusion of `code` snippets) depends on the `RepoMapper`'s symbol extraction capabilities for the specific languages in your repository.
122 | </Aside>
123 | 
124 | ---
125 | 
126 | ## Integration Ideas
127 | 
128 | - Use the JSON output to feed custom dashboards or documentation tools.
129 | - Integrate with code search or visualization tools.
130 | - Use for code audits, onboarding, or automated reporting.
131 | 
132 | ---
133 | 
134 | ## Conclusion
135 | 
136 | With `kit`, you can easily export a structured map of your repository using `repo.write_index()`, making this data readily available for various downstream use cases and custom tooling.
137 | 


--------------------------------------------------------------------------------
/docs/src/content/docs/tutorials/integrating_supersonic.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Integrating with Supersonic
 3 | description: Using kit for code analysis and Supersonic for automated PR creation.
 4 | ---
 5 | 
 6 | import { Aside } from '@astrojs/starlight/components';
 7 | 
 8 | `kit` excels at understanding and analyzing codebases, while [Supersonic](https://github.com/cased/supersonic) provides a high-level Python API specifically designed for programmatically creating GitHub Pull Requests. Combining them allows you to build powerful workflows that analyze code, generate changes, and automatically propose those changes via PRs.
 9 | 
10 | <Aside type="note">
11 |   **Use Case**
12 |   Think of workflows like AI-powered code refactoring, automated dependency updates based on analysis, or generating documentation snippets from code and submitting them for review.
13 | </Aside>
14 | 
15 | ## The Workflow: Analyze with `kit`, Act with `Supersonic`
16 | 
17 | A typical integration pattern looks like this:
18 | 
19 | 1.  **Analyze Code with `kit`**: Use `kit.Repository` methods like `extract_symbols`, `find_symbol_usages`, or `search_semantic` to understand the codebase or identify areas for modification.
20 | 2.  **Generate Changes**: Based on the analysis (potentially involving an LLM), generate the new code content or identify necessary file modifications.
21 | 3.  **Create PR with `Supersonic`**: Use `Supersonic`'s simple API (`create_pr_from_content`, `create_pr_from_file`, etc.) to package the generated changes into a new Pull Request on GitHub.
22 | 
23 | ## Example: AI Refactoring Suggestion
24 | 
25 | Imagine an AI tool that uses `kit` to analyze a Python file, identifies a potential refactoring, generates the improved code, and then uses `Supersonic` to create a PR.
26 | 
27 | ```python
28 | import kit
29 | from supersonic import Supersonic
30 | import os
31 | 
32 | # Assume kit.Repository is initialized with a local path
33 | LOCAL_REPO_PATH = "/path/to/your/local/repo/clone"
34 | # repo_analyzer = kit.Repository(LOCAL_REPO_PATH)
35 | # Note: kit analysis methods like extract_symbols would still be used here in a real scenario.
36 | 
37 | # Assume 'ai_generate_refactoring' is your function that uses an LLM
38 | # potentially fed with context from kit (not shown here for brevity)
39 | def ai_generate_refactoring(original_code: str) -> str:
40 |     # ... your AI logic here ...
41 |     improved_code = original_code.replace("old_function", "new_function") # Simplified example
42 |     return improved_code
43 | 
44 | # --- Configuration ---
45 | GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
46 | REPO_OWNER_SLASH_NAME = "your-org/your-repo" # For Supersonic PR creation
47 | RELATIVE_FILE_PATH = "src/legacy_module.py" # Relative path within the repo
48 | FULL_FILE_PATH = os.path.join(LOCAL_REPO_PATH, RELATIVE_FILE_PATH)
49 | TARGET_BRANCH = "main" # Or dynamically determine
50 | 
51 | # --- Main Workflow ---
52 | 
53 | try:
54 |     # 1. Get original content (assuming local repo)
55 |     if not os.path.exists(FULL_FILE_PATH):
56 |         print(f"Error: File not found at {FULL_FILE_PATH}")
57 |         exit()
58 | 
59 |     with open(FULL_FILE_PATH, 'r') as f:
60 |         original_content = f.read()
61 | 
62 |     # 2. Generate Changes (using AI or other logic)
63 |     refactored_content = ai_generate_refactoring(original_content)
64 | 
65 |     if refactored_content != original_content:
66 |         # 3. Create PR with Supersonic
67 |         supersonic_client = Supersonic(GITHUB_TOKEN)
68 |         pr_title = f"AI Refactor: Improve {RELATIVE_FILE_PATH}"
69 |         pr_body = f"""
70 |         AI analysis suggests refactoring in `{RELATIVE_FILE_PATH}`.
71 | 
72 |         This PR applies the suggested changes. Please review carefully.
73 |         """
74 | 
75 |         pr_url = supersonic_client.create_pr_from_content(
76 |             repo=REPO_OWNER_SLASH_NAME,
77 |             content=refactored_content,
78 |             upstream_path=RELATIVE_FILE_PATH, # Path within the target repo
79 |             title=pr_title,
80 |             description=pr_body,
81 |             base_branch=TARGET_BRANCH,
82 |             labels=["ai-refactor", "needs-review"],
83 |             draft=True # Good practice for AI suggestions
84 |         )
85 |         print(f"Successfully created PR: {pr_url}")
86 |     else:
87 |         print("No changes generated.")
88 | 
89 | except Exception as e:
90 |     print(f"An error occurred: {e}")
91 | 
92 | ```
93 | 
94 | This example illustrates how `kit`'s analytical strengths can be combined with `Supersonic`'s action-oriented PR capabilities to build powerful code automation.
95 | 


--------------------------------------------------------------------------------
/docs/src/env.d.ts:
--------------------------------------------------------------------------------
1 | /// <reference path="../.astro/types.d.ts" />


--------------------------------------------------------------------------------
/docs/src/styles/fonts/IBMPlexSansV.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cased/kit/7b2d248e06f9105dd51ef7968c0573041c19a80b/docs/src/styles/fonts/IBMPlexSansV.ttf


--------------------------------------------------------------------------------
/docs/src/styles/fonts/iAWriterQuattroV.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cased/kit/7b2d248e06f9105dd51ef7968c0573041c19a80b/docs/src/styles/fonts/iAWriterQuattroV.ttf


--------------------------------------------------------------------------------
/docs/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": "astro/tsconfigs/base"
3 | }
4 | 


--------------------------------------------------------------------------------
/package-lock.json:
--------------------------------------------------------------------------------
1 | {
2 |   "name": "kit",
3 |   "lockfileVersion": 3,
4 |   "requires": true,
5 |   "packages": {}
6 | }
7 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [project]
  2 | name = "cased-kit"
  3 | version = "0.6.3"
  4 | description = "A modular toolkit for LLM-powered codebase understanding."
  5 | authors = [
  6 |     { name = "Cased", email = "ted@cased.com" }
  7 | ]
  8 | readme = "README.md"
  9 | requires-python = ">=3.10"
 10 | license = {text = "MIT"}
 11 | dependencies = [
 12 |     "tree-sitter-language-pack>=0.7.2",
 13 |     "pathspec>=0.11.1",
 14 |     "pytest>=8.3.5",
 15 |     "numpy>=1.25",
 16 |     "fastapi==0.110.0",     
 17 |     "uvicorn[standard]>=0.20",
 18 |     "typer>=0.9,<0.15",
 19 |     "click>=8.0,<8.2",
 20 |     "openai>=1.0.0",
 21 |     "tiktoken>=0.4.0",
 22 |     "anthropic>=0.20.0",
 23 |     "google-genai>=1.14.0",
 24 |     "python-hcl2>=7.2.0",
 25 |     "mypy",
 26 |     "ruff",
 27 |     "mcp>=1.8.0,<2.0.0",
 28 |     "redis>=5.0.0",
 29 |     "requests>=2.25.0",
 30 |     "pyyaml>=6.0",
 31 |     "types-PyYAML>=6.0.12.20250516",  # Type stubs for yaml
 32 |     "types-requests>=2.32.0.20250515",  # Type stubs for requests
 33 | ]
 34 | 
 35 | [project.urls]
 36 | Homepage = "https://github.com/cased/kit"
 37 | 
 38 | [project.scripts]
 39 | kit = "kit.cli:app"
 40 | kit-mcp = "kit.mcp:main"
 41 | 
 42 | [tool.setuptools]
 43 | package-dir = {"" = "src"}
 44 | 
 45 | [tool.setuptools.packages.find]
 46 | where = ["src"]
 47 | 
 48 | [tool.setuptools.package-data]
 49 | "kit.queries" = ["*/*.scm"]
 50 | "kit" = ["queries/*/*/*.scm"]
 51 | 
 52 | [build-system]
 53 | requires = ["setuptools>=61.0"]
 54 | build-backend = "setuptools.build_meta"
 55 | 
 56 | [tool.pytest.ini_options]
 57 | minversion = "6.0"
 58 | addopts = "-ra -q"
 59 | testpaths = [
 60 |     "tests"
 61 | ]
 62 | python_files = "test_*.py"
 63 | python_classes = "Test*"
 64 | python_functions = "test_*"
 65 | markers = [
 66 |     "asyncio: mark test as asyncio to run with pytest-asyncio",
 67 |     "integration: marks tests as integration tests (may be slower)",
 68 |     "llm: marks tests that call LLM APIs (expensive, requires API keys)",
 69 |     "expensive: marks tests that are expensive/slow to run",
 70 |     "performance: marks tests that measure performance characteristics",
 71 |     "ci_skip: marks tests that should be skipped in CI environments",
 72 | ]
 73 | 
 74 | [tool.mypy]
 75 | ignore_missing_imports = true
 76 | 
 77 | [project.optional-dependencies]
 78 | dev = [
 79 |     "build", # build wheels
 80 |     "twine",  # publish to PyPI
 81 | ]
 82 | test-api = [
 83 |     "fastapi", # For TestClient
 84 |     "pytest" # Already in core, but good to list for a test group
 85 | ]
 86 | ml = [
 87 |     "sentence-transformers>=2.2.0",  # For VectorSearcher and DocstringIndexer
 88 |     "chromadb>=0.5.23",  # Vector database for semantic search
 89 | ]
 90 | all = [
 91 |     "sentence-transformers>=2.2.0",
 92 |     "chromadb>=0.5.23",
 93 | ]
 94 | 
 95 | [tool.ruff]
 96 | # Set line length to 120 characters
 97 | line-length = 120
 98 | # Target Python 3.10 as specified in our requires-python
 99 | target-version = "py310"
100 | 
101 | # Configure linting
102 | [tool.ruff.lint]
103 | # Select these rule sets (categories)
104 | select = ["E", "F", "W", "I", "RUF"]
105 | ignore = []
106 | 
107 | # Configure isort rules
108 | [tool.ruff.lint.isort]
109 | known-first-party = ["kit", "cased_kit"]
110 | 
111 | # Configure formatter
112 | [tool.ruff.format]
113 | # Formatting uses line-length from the top level
114 | 


--------------------------------------------------------------------------------
/scripts/benchmark.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | from kit.repository import Repository as Repo
 4 | 
 5 | 
 6 | def main():
 7 |     import argparse
 8 | 
 9 |     parser = argparse.ArgumentParser(description="Benchmark kit repo indexing.")
10 |     parser.add_argument("repo", nargs="?", default=".", help="Path to repo root (default: .)")
11 |     args = parser.parse_args()
12 |     repo = Repo(args.repo)
13 | 
14 |     print(f"Indexing repo at {args.repo} ...")
15 |     start = time.time()
16 |     idx = repo.index()
17 |     elapsed = time.time() - start
18 |     num_files = len(idx["file_tree"])
19 |     num_symbols = sum(len(syms) for syms in idx["symbols"].values())
20 |     print(f"Indexed {num_files} files, {num_symbols} symbols in {elapsed:.2f} seconds.")
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     main()
25 | 


--------------------------------------------------------------------------------
/scripts/format.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # This script runs linting and formatting checks using Ruff.
 3 | # Pass --fix as the first argument to automatically apply fixes.
 4 | 
 5 | # Exit immediately if a command exits with a non-zero status.
 6 | set -e
 7 | 
 8 | # Navigate to the root of the repository relative to the script directory
 9 | cd "$(dirname "$0")/.."
10 | 
11 | # Check the first argument
12 | if [ "$1" == "--fix" ]; then
13 |   echo "Running Ruff to apply fixes (linting and formatting)..."
14 |   # Apply lint rule fixes (autofixable ones)
15 |   ruff check . --fix
16 |   # Apply formatting fixes
17 |   ruff format .
18 |   echo "Ruff fixes applied successfully!"
19 | else
20 |   echo "Running Ruff linter and formatting check (no fixes applied)..."
21 |   # Ruff check combines linting and format checking
22 |   ruff check .
23 |   echo "Ruff checks passed successfully!"
24 | fi 


--------------------------------------------------------------------------------
/scripts/index.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | CLI: Index a repo and print the file tree and symbols as JSON
 4 | Usage:
 5 |     python scripts/index.py /path/to/repo
 6 | """
 7 | 
 8 | import json
 9 | import sys
10 | 
11 | from kit import Repository
12 | 
13 | if __name__ == "__main__":
14 |     repo_path = sys.argv[1] if len(sys.argv) > 1 else "."
15 |     repo = Repository(repo_path)
16 |     index = repo.index()
17 |     print(json.dumps(index, indent=2))
18 | 


--------------------------------------------------------------------------------
/scripts/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Ensure all deps (including vector search) are installed, then run tests
3 | 
4 | export PYTHONPATH=src
5 | python -m pytest "$@"
6 | 


--------------------------------------------------------------------------------
/scripts/typecheck.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Run mypy type checks for all source and test code
3 | export PYTHONPATH=src
4 | mypy src/kit
5 | mypy tests
6 | 


--------------------------------------------------------------------------------
/src/kit/api/__init__.py:
--------------------------------------------------------------------------------
1 | """kit REST API package."""
2 | 
3 | from .app import app  # re-export for `uvicorn kit.api:app`
4 | 


--------------------------------------------------------------------------------
/src/kit/dependency_analyzer/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 | 
3 | from .dependency_analyzer import DependencyAnalyzer
4 | 
5 | __all__ = ["DependencyAnalyzer"]
6 | 


--------------------------------------------------------------------------------
/src/kit/llm_context.py:
--------------------------------------------------------------------------------
 1 | """Utilities to assemble rich prompts for LLMs.
 2 | 
 3 | This is intentionally lightweight – it glues together repository data
 4 | (diff, file bodies, search hits, etc.) into a single string that can be
 5 | fed straight into a chat completion.
 6 | """
 7 | 
 8 | from __future__ import annotations
 9 | 
10 | from pathlib import Path
11 | from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence
12 | 
13 | if TYPE_CHECKING:
14 |     from .repository import Repository
15 | 
16 | 
17 | class ContextAssembler:
18 |     """Collects pieces of context and spits out a prompt blob.
19 | 
20 |     Parameters
21 |     ----------
22 |     repo
23 |         A :class:`kit.repository.Repository` object representing the codebase
24 |         we want to reason about. The assembler uses it to fetch file content
25 |         and (in the future) symbol relationships.
26 |     title
27 |         Optional global title prepended to the context (not used by default).
28 |     """
29 | 
30 |     def __init__(self, repo: Repository, *, title: Optional[str] = None) -> None:
31 |         self.repo = repo
32 |         self._sections: List[str] = []
33 |         if title:
34 |             self._sections.append(f"# {title}\n")
35 | 
36 |     def add_diff(self, diff: str) -> None:
37 |         """Add a raw git diff section."""
38 |         if not diff.strip():
39 |             return
40 |         self._sections.append("## Diff\n```diff\n" + diff.strip() + "\n```")
41 | 
42 |     def add_file(
43 |         self,
44 |         file_path: str,
45 |         *,
46 |         highlight_changes: bool = False,
47 |         max_lines: int | None = None,
48 |         max_bytes: int | None = None,
49 |         skip_if_name_in: Optional[Sequence[str]] = None,
50 |     ) -> None:
51 |         """Embed full file content.
52 | 
53 |         If *highlight_changes* is true we still just inline raw content –
54 |         markup is left to the caller/LLM.
55 |         """
56 |         # Guard: skip by exact filename
57 |         if skip_if_name_in and Path(file_path).name in skip_if_name_in:
58 |             return
59 | 
60 |         try:
61 |             code = self.repo.get_file_content(file_path)
62 |         except FileNotFoundError:
63 |             return
64 | 
65 |         # Guards: size limits
66 |         if max_bytes is not None and len(code.encode("utf-8", "ignore")) > max_bytes:
67 |             return
68 |         if max_lines is not None and code.count("\n") + 1 > max_lines:
69 |             return
70 | 
71 |         lang = Path(file_path).suffix.lstrip(".") or "text"
72 |         header = f"## {file_path} (full)" if not highlight_changes else f"## {file_path} (with changes highlighted)"
73 |         self._sections.append(f"{header}\n```{lang}\n{code}\n```")
74 | 
75 |     def add_search_results(self, results: Sequence[Dict[str, Any]], *, query: str) -> None:
76 |         """Append semantic search matches to the context."""
77 |         if not results:
78 |             return
79 |         blob = [f"## Semantic search for: {query}"]
80 |         for i, res in enumerate(results, 1):
81 |             code = res.get("code") or res.get("snippet") or ""
82 |             file = res.get("file", f"result_{i}")
83 |             blob.append(f"### {file}\n```\n{code}\n```")
84 |         self._sections.append("\n".join(blob))
85 | 
86 |     def format_context(self) -> str:
87 |         """Return the accumulated context."""
88 |         return "\n\n".join(self._sections)
89 | 


--------------------------------------------------------------------------------
/src/kit/mcp/__init__.py:
--------------------------------------------------------------------------------
1 | """kit.mcp – Model Context Protocol server wrapper."""
2 | 
3 | from __future__ import annotations
4 | 
5 | from .main import main as main
6 | from .server import serve as serve
7 | 
8 | __all__ = ["main", "serve"]
9 | 


--------------------------------------------------------------------------------
/src/kit/mcp/__main__.py:
--------------------------------------------------------------------------------
1 | """Entry point for the MCP module."""
2 | 
3 | from __future__ import annotations
4 | 
5 | from .main import main
6 | 
7 | if __name__ == "__main__":
8 |     main()
9 | 


--------------------------------------------------------------------------------
/src/kit/mcp/main.py:
--------------------------------------------------------------------------------
 1 | """Console-script entry point for the Kit MCP server."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import asyncio
 6 | import logging
 7 | import sys
 8 | 
 9 | from .server import serve
10 | 
11 | 
12 | def main() -> None:
13 |     """Launch the Kit MCP server."""
14 |     try:
15 |         asyncio.run(serve())
16 |     except KeyboardInterrupt:
17 |         logging.info("Server stopped by user")
18 |     except Exception as e:  # pragma: no cover
19 |         logging.error(f"Server error: {e!s}", exc_info=True)
20 |         sys.exit(1)
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     main()
25 | 


--------------------------------------------------------------------------------
/src/kit/pr_review/ROADMAP.md:
--------------------------------------------------------------------------------
 1 | # Kit Roadmap
 2 | 
 3 | This roadmap outlines planned features and improvements for kit, prioritized by user feedback and strategic value.
 4 | 
 5 | ## 📅 Planned Features
 6 | 
 7 | #### Per-User/Per-Organization Custom Context
 8 | Store custom guidelines, coding standards, and preferences that get automatically included in reviews.
 9 | 
10 | ```bash
11 | # Example usage
12 | kit profile create --name "company-standards" --file coding-guidelines.md
13 | kit review --profile company-standards <pr-url>
14 | ```
15 | 
16 | #### Feedback Learning System
17 | Simple database to store review feedback and adapt over time.
18 | 
19 | ```bash
20 | # Example feedback workflow
21 | kit review <pr-url>  # Generates review
22 | kit feedback <review-id> --helpful/--not-helpful --notes "Missed performance issue"
23 | kit insights  # Show what's working well
24 | ```
25 | 
26 | #### Inline Comments & GitHub Review API
27 | Post comments directly on specific lines instead of single review comment.
28 | 
29 | ```bash
30 | kit review <pr-url> --mode inline  # Line-by-line comments
31 | ```
32 | 
33 | ### 🎯 Medium Term (Q3-Q4 2025)
34 | 
35 | #### Multi-Model Consensus
36 | Route different aspects to different models and aggregate insights.
37 | 
38 | ```bash
39 | kit review <pr-url> --consensus  # Use multiple models, combine results
40 | ```
41 | 
42 | #### Repository Context Learning
43 | Learn which types of context are most valuable and adapt automatically.
44 | 
45 | #### IDE Integration
46 | Real-time suggestions in VS Code and other editors while coding.
47 | 
48 | ---
49 | 
50 | ## 🔧 Technical Improvements
51 | 
52 | - **Model Router**: Intelligent routing to optimal models based on PR complexity
53 | - **Context Optimization**: Smarter context selection to maximize LLM effectiveness
54 | - **Plugin System**: Simple plugin architecture for custom analyzers
55 | 
56 | ---
57 | 
58 | ## 🎯 Success Metrics
59 | 
60 | ### User Experience
61 | - **Review Relevance**: >80% of suggestions rated as helpful
62 | - **Response Time**: <30 seconds for standard reviews
63 | - **Cost Efficiency**: <$0.10 per review for typical usage
64 | - **Adoption Rate**: >90% of PRs reviewed within 1 hour
65 | 
66 | ### Technical Quality
67 | - **Uptime**: >99.9% availability for cloud service
68 | - **Accuracy**: <5% false positive rate on issue detection
69 | - **Performance**: Support for repositories up to 1M lines of code
70 | - **Scalability**: Handle 10,000+ reviews per day per organization
71 | 
72 | ### Business Impact
73 | - **Code Quality**: Measurable improvement in code quality metrics
74 | - **Development Velocity**: Faster PR review cycles
75 | - **Bug Reduction**: Fewer bugs in production
76 | - **Developer Satisfaction**: High satisfaction scores from development teams
77 | 
78 | ---
79 | 
80 | ## 📞 Get Involved
81 | 
82 | - **Feature Requests**: [Open an issue](https://github.com/cased/kit/issues) with your ideas
83 | - **User Feedback**: Join our [Discord community](https://discord.gg/fbAVtCeU) soon for discussions
84 | - **Contributions**: Submit PRs for features you'd like to see
85 | 
86 | ---
87 | 


--------------------------------------------------------------------------------
/src/kit/pr_review/__init__.py:
--------------------------------------------------------------------------------
1 | """PR Review functionality for kit."""
2 | 
3 | from .cache import RepoCache
4 | from .config import ReviewConfig
5 | from .reviewer import PRReviewer
6 | 
7 | __all__ = ["PRReviewer", "RepoCache", "ReviewConfig"]
8 | 


--------------------------------------------------------------------------------
/src/kit/pr_review/__main__.py:
--------------------------------------------------------------------------------
1 | """Entry point for running the PR review debug CLI as a module."""
2 | 
3 | from .debug import app
4 | 
5 | if __name__ == "__main__":
6 |     app()
7 | 


--------------------------------------------------------------------------------
/src/kit/pr_review/debug.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """Debug CLI for PR review testing."""
 3 | 
 4 | from typing import Optional
 5 | 
 6 | import typer
 7 | 
 8 | app = typer.Typer(help="Debug tools for PR review testing.")
 9 | 
10 | 
11 | @app.command("review")
12 | def review_pr(
13 |     pr_url: str,
14 |     config: Optional[str] = typer.Option(None, "--config", "-c", help="Path to config file"),
15 |     dry_run: bool = typer.Option(True, "--dry-run/--post", help="Run analysis but do not post comment"),
16 | ):
17 |     """Review a GitHub PR using kit analysis for testing."""
18 |     try:
19 |         from .config import ReviewConfig
20 |         from .reviewer import PRReviewer
21 | 
22 |         # Load configuration
23 |         if config:
24 |             review_config = ReviewConfig.from_file(config)
25 |         else:
26 |             review_config = ReviewConfig.from_file()
27 | 
28 |         # Override post_as_comment if dry run
29 |         if dry_run:
30 |             review_config.post_as_comment = False
31 | 
32 |         # Run review
33 |         reviewer = PRReviewer(review_config)
34 |         result = reviewer.review_pr(pr_url)
35 | 
36 |         if dry_run:
37 |             typer.echo("\n" + "=" * 50)
38 |             typer.echo("REVIEW RESULT (DRY RUN)")
39 |             typer.echo("=" * 50)
40 |             typer.echo(result)
41 |         else:
42 |             typer.echo("✅ Review posted to PR")
43 | 
44 |     except Exception as e:
45 |         typer.echo(f"❌ Error during review: {e}", err=True)
46 |         raise typer.Exit(1)
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     app()
51 | 


--------------------------------------------------------------------------------
/src/kit/pr_review/example_reviews/biopython_204_documentation_fix.md:
--------------------------------------------------------------------------------
 1 | # Example Review: Biopython Documentation Fix PR #204
 2 | 
 3 | **Repository**: [biopython/biopython.github.io](https://github.com/biopython/biopython.github.io)  
 4 | **PR**: [#204 - Add correct import statement to FAQ](https://github.com/biopython/biopython.github.io/pull/204)  
 5 | **Status**: Merged ✅  
 6 | **Changed Files**: 1  
 7 | **Review Model**: Claude Sonnet 4  
 8 | **Cost**: $0.0064  
 9 | 
10 | ## PR Summary
11 | 
12 | This is a simple documentation fix that adds a correct import statement to the Biopython Structural Bioinformatics FAQ. The author experienced an error when following the documentation and fixed it by adding the proper import statement that was missing.
13 | 
14 | **Problem**: `MMCIF2Dict("5SWE.Cif")` was failing with "TypeError: 'module' object is not callable" because the import statement only imported the module, not the class.
15 | 
16 | **Solution**: Added the correct import statement to make the code examples work properly.
17 | 
18 | ---
19 | 
20 | ## Kit AI Review Output
21 | 
22 | ### Priority Issues
23 | No significant issues identified. This is a straightforward documentation improvement.
24 | 
25 | ### Summary
26 | This PR adds a correct import statement to the Biopython Structural Bioinformatics FAQ documentation. The change appears to be a minor documentation fix, adding 2 lines to clarify proper import usage for users following the FAQ guide.
27 | 
28 | Based on the file analysis showing:
29 | - Single markdown file modified (`wiki/The_Biopython_Structural_Bioinformatics_FAQ.md`)
30 | - 2 lines added, 0 lines removed
31 | - No symbols or code dependencies affected
32 | 
33 | This is a low-risk documentation enhancement that should improve user experience by providing clearer import guidance.
34 | 
35 | ### Recommendations
36 | 
37 | **Documentation Quality:**
38 | - Since the actual diff content isn't accessible due to the 404 error, I recommend verifying that:
39 |   - The import statement follows current Biopython conventions
40 |   - The syntax is correct and tested
41 |   - The import aligns with the specific FAQ context where it's being added
42 | 
43 | **Process Improvement:**
44 | - Consider adding a brief comment in the PR description explaining which specific import was corrected and why, to help future maintainers understand the context
45 | 
46 | **Validation:**
47 | - Ensure the import statement works with the current version of Biopython being documented
48 | - Verify the import is placed in the appropriate section of the FAQ for logical flow
49 | 
50 | This appears to be a beneficial change that enhances the documentation's accuracy and user-friendliness with minimal risk.
51 | 
52 | ---
53 | 
54 | ## Review Quality Metrics
55 | 
56 | - **File References**: 1
57 | - **Line References**: 0  
58 | - **Specific Issues**: 1
59 | - **Vague Statements**: 0
60 | - **GitHub Links**: 0
61 | - **Code Relevance**: 0.8
62 | - **Change Coverage**: 1.0
63 | - **Overall Score**: 1.00/1.0
64 | 
65 | ## Key Insights
66 | 
67 | 1. **Proportional Response**: Kit provided an appropriately scaled review for a small documentation change
68 | 2. **Risk Assessment**: Correctly identified this as a low-risk documentation enhancement
69 | 3. **Context Understanding**: Recognized the fix was about import statement corrections
70 | 4. **Practical Recommendations**: Focused on documentation quality and validation rather than complex code issues
71 | 5. **Cost Efficiency**: Minimal cost ($0.0064) for appropriate level of analysis
72 | 
73 | This review demonstrates Kit's ability to scale its analysis appropriately for different types of changes, providing useful feedback without over-engineering the response for simple fixes. 


--------------------------------------------------------------------------------
/src/kit/pr_review/example_reviews/fastapi_11935_standard_dependencies.md:
--------------------------------------------------------------------------------
 1 | # Example Review: FastAPI PR #11935
 2 | 
 3 | **Repository**: [tiangolo/fastapi](https://github.com/tiangolo/fastapi)  
 4 | **PR**: [#11935 - Add support for `pip install "fastapi[standard]"` with standard dependencies](https://github.com/tiangolo/fastapi/pull/11935)  
 5 | **Status**: Merged ✅  
 6 | **Changed Files**: 15  
 7 | **Review Model**: Claude Sonnet 4  
 8 | **Cost**: $0.0340  
 9 | 
10 | ## PR Summary
11 | 
12 | This PR introduces a significant packaging change for FastAPI, moving from a "batteries-included" default installation to an opt-in approach for standard dependencies. The change allows users to install either:
13 | - `pip install fastapi` (minimal installation)
14 | - `pip install "fastapi[standard]"` (full experience with CLI and development tools)
15 | 
16 | ---
17 | 
18 | ## Kit AI Review Output
19 | 
20 | ### Priority Issues
21 | 
22 | **Medium Priority**
23 | - [fastapi/cli.py:12](https://github.com/fastapi/fastapi/blob/main/fastapi/cli.py#L12): The error handling raises a RuntimeError after printing the message, which could be confusing in programmatic usage. Consider using a more specific exception type like `ImportError` or `ModuleNotFoundError`.
24 | 
25 | **Low Priority**
26 | - [tests/test_fastapi_cli.py:14](https://github.com/fastapi/fastapi/blob/main/tests/test_fastapi_cli.py#L14): The test uses coverage run which adds complexity. Consider testing the CLI functionality more directly or documenting why coverage is needed here.
27 | 
28 | ### Summary
29 | 
30 | This PR introduces a significant packaging change for FastAPI, moving from a "batteries-included" default installation to an opt-in approach for standard dependencies. Key changes include:
31 | 
32 | - **New installation pattern**: Users now install `pip install "fastapi[standard]"` for the full experience, while `pip install fastapi` provides a minimal installation
33 | - **CLI integration**: Adds `python -m fastapi` support through new `__main__.py` and `cli.py` modules
34 | - **Graceful degradation**: When CLI dependencies aren't available, provides clear error messages with installation instructions
35 | - **Documentation updates**: Comprehensive updates across README and docs to reflect the new installation approach
36 | 
37 | The architectural change simplifies the core FastAPI package while maintaining backward compatibility through optional dependencies.
38 | 
39 | ### Recommendations
40 | 
41 | **Error Handling Enhancement**
42 | ```python
43 | # In fastapi/cli.py, consider using a more specific exception:
44 | def main() -> None:
45 |     if not cli_main:
46 |         message = 'To use the fastapi command, please install "fastapi[standard]":\n\n\tpip install "fastapi[standard]"\n'
47 |         print(message)
48 |         raise ImportError(message) from None  # More semantically correct
49 | ```
50 | 
51 | **Test Coverage Improvement**
52 | - Consider adding a test that verifies the CLI works when `fastapi-cli` is properly installed
53 | - The current test only covers the error case and uses subprocess with coverage, which may be brittle
54 | 
55 | **Documentation Consistency**
56 | - Verify that all code examples in tutorials and documentation are updated to use the new installation pattern
57 | - Consider adding a migration guide for existing users
58 | 
59 | **Cross-codebase Impact**
60 | - This change will affect all downstream projects and tutorials that reference FastAPI installation
61 | - Consider coordinating with the ecosystem (tutorials, courses, etc.) for a smooth transition
62 | - The change is well-documented but represents a significant shift in user experience
63 | 
64 | The implementation is solid and the graceful error handling for missing CLI dependencies is well-designed. The packaging simplification aligns with modern Python practices of minimal default installations with optional extras.
65 | 
66 | ---
67 | 
68 | ## Review Quality Metrics
69 | 
70 | - **File References**: 3
71 | - **Line References**: 4  
72 | - **Specific Issues**: 7
73 | - **Vague Statements**: 0
74 | - **GitHub Links**: 2
75 | - **Code Relevance**: 1.0
76 | - **Change Coverage**: 1.0
77 | - **Overall Score**: 1.00/1.0
78 | 
79 | ## Key Insights
80 | 
81 | 1. **Comprehensive Analysis**: Kit identified both the immediate code changes and broader ecosystem impact
82 | 2. **Specific Recommendations**: Provided concrete code suggestions with rationale
83 | 3. **Quality Focus**: Flagged potential issues in error handling and test design
84 | 4. **Context Awareness**: Understood this was a breaking change requiring ecosystem coordination
85 | 
86 | This review demonstrates Kit's ability to understand architectural changes and their broader implications beyond just code syntax. 


--------------------------------------------------------------------------------
/src/kit/pr_review/example_reviews/react_dev_6986_branding_menu.md:
--------------------------------------------------------------------------------
 1 | # Example Review: React.dev PR #6986
 2 | 
 3 | **Repository**: [reactjs/react.dev](https://github.com/reactjs/react.dev)  
 4 | **PR**: [#6986 - Add branding context menu](https://github.com/reactjs/react.dev/pull/6986)  
 5 | **Status**: Merged ✅  
 6 | **Changed Files**: 8  
 7 | **Review Model**: Claude Sonnet 4  
 8 | **Cost**: $0.0118  
 9 | 
10 | ## PR Summary
11 | 
12 | This PR adds a context menu to the React logo in the top navigation that provides access to React branding materials. When users right-click on the logo, they can access brand assets including logos and wordmarks in both light and dark variants.
13 | 
14 | ---
15 | 
16 | ## Kit AI Review Output
17 | 
18 | ### Priority Issues
19 | 
20 | **Medium Priority:**
21 | - [src/components/Layout/TopNav/BrandMenu.tsx:1](https://github.com/rickhanlonii/react.dev/blob/main/src/components/Layout/TopNav/BrandMenu.tsx#L1) - Missing accessibility attributes for the dropdown menu. The menu button should have `aria-expanded`, `aria-haspopup`, and proper ARIA labeling for screen readers.
22 | 
23 | - [src/components/Layout/TopNav/BrandMenu.tsx:1](https://github.com/rickhanlonii/react.dev/blob/main/src/components/Layout/TopNav/BrandMenu.tsx#L1) - No keyboard navigation support. Users should be able to navigate menu items with arrow keys and close with Escape key.
24 | 
25 | **Low Priority:**
26 | - [src/components/Layout/TopNav/TopNav.tsx:1](https://github.com/reactjs/react.dev/blob/main/src/components/Layout/TopNav/TopNav.tsx#L1) - The modification to existing TopNav component may affect existing functionality. Ensure backward compatibility is maintained.
27 | 
28 | ### Summary
29 | 
30 | This PR adds a branding context menu to the top navigation, introducing:
31 | - A new `BrandMenu` component with dropdown functionality
32 | - Brand assets (logos and wordmarks) in both light and dark variants
33 | - Integration with the existing `TopNav` component
34 | - Addition of a new dependency in package.json
35 | 
36 | The implementation appears to be a self-contained feature addition that provides users with access to React branding materials through a dropdown menu in the navigation bar.
37 | 
38 | ### Recommendations
39 | 
40 | **Accessibility Improvements:**
41 | ```tsx
42 | // Add proper ARIA attributes to the menu button
43 | <button
44 |   aria-expanded={isOpen}
45 |   aria-haspopup="menu"
46 |   aria-label="React brand assets menu"
47 |   onClick={toggleMenu}
48 | >
49 | ```
50 | 
51 | **Keyboard Navigation:**
52 | - Implement `onKeyDown` handlers for arrow key navigation between menu items
53 | - Add Escape key handler to close the menu
54 | - Ensure proper focus management when menu opens/closes
55 | 
56 | **Error Handling:**
57 | - Add error boundaries around the new component to prevent crashes if brand assets fail to load
58 | - Consider lazy loading for the brand assets to improve initial page load performance
59 | 
60 | **Testing Considerations:**
61 | - Verify the menu works correctly on mobile devices
62 | - Test with screen readers to ensure accessibility compliance
63 | - Confirm the menu doesn't interfere with existing navigation functionality
64 | 
65 | **Security Note:**
66 | The SVG files should be validated to ensure they don't contain any malicious scripts, though they appear to be standard brand assets.
67 | 
68 | The overall implementation looks solid for a branding feature, but accessibility and keyboard navigation improvements would make it more inclusive and user-friendly.
69 | 
70 | ---
71 | 
72 | ## Review Quality Metrics
73 | 
74 | - **File References**: 3
75 | - **Line References**: 6  
76 | - **Specific Issues**: 5
77 | - **Vague Statements**: 1
78 | - **GitHub Links**: 3
79 | - **Code Relevance**: 0.24 (flagged as potentially off-topic)
80 | 
81 | ## Key Insights
82 | 
83 | 1. **Accessibility Focus**: Kit automatically identified missing accessibility features in UI components
84 | 2. **User Experience**: Flagged keyboard navigation gaps that impact usability
85 | 3. **Integration Awareness**: Noted potential impact on existing navigation functionality
86 | 4. **Security Consciousness**: Mentioned SVG validation as a security consideration
87 | 5. **Performance Suggestions**: Recommended lazy loading for better initial page load
88 | 
89 | This review demonstrates Kit's ability to analyze frontend code with attention to accessibility, user experience, and modern web development best practices. 


--------------------------------------------------------------------------------
/src/kit/pr_review/line_ref_fixer.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import re
 4 | from typing import Dict, List, Tuple
 5 | 
 6 | from .diff_parser import DiffParser
 7 | 
 8 | 
 9 | class LineRefFixer:
10 |     """Utility to validate and auto-fix file:line references in an AI review comment."""
11 | 
12 |     # Match file references like path/to/file.ext:123 or file.ext:10-20
13 |     # Extension 1–10 alphanum chars to avoid over-matching URLs.
14 |     REF_PATTERN = re.compile(r"([\w./+-]+\.[a-zA-Z0-9]{1,10}):(\d+)(?:-(\d+))?")
15 | 
16 |     @classmethod
17 |     def _build_valid_line_map(cls, diff_text: str) -> Dict[str, set[int]]:
18 |         diff_files = DiffParser.parse_diff(diff_text)
19 |         valid: Dict[str, set[int]] = {}
20 |         for filename, fd in diff_files.items():
21 |             line_set: set[int] = set()
22 |             for hunk in fd.hunks:
23 |                 cur = hunk.new_start
24 |                 for raw in hunk.lines:
25 |                     # Any line that exists in the *new* file (context or addition) is legal.
26 |                     if not raw.startswith("-"):
27 |                         line_set.add(cur)
28 |                         cur += 1
29 |             valid[filename] = line_set
30 |         return valid
31 | 
32 |     @classmethod
33 |     def fix_comment(cls, comment: str, diff_text: str) -> Tuple[str, List[Tuple[str, int, int]]]:
34 |         """Return (fixed_comment, fixes).
35 | 
36 |         fixes list items are (filename, old_line, new_line).
37 |         """
38 |         valid_map = cls._build_valid_line_map(diff_text)
39 |         fixes: List[Tuple[str, int, int]] = []
40 | 
41 |         def _nearest(file: str, line: int) -> int:
42 |             lines = valid_map.get(file, set())
43 |             return min(lines, key=lambda n: abs(n - line)) if lines else line
44 | 
45 |         def _replacer(match: re.Match[str]) -> str:
46 |             file, start_s, end_s = match.groups()
47 |             start = int(start_s)
48 |             if end_s:
49 |                 end = int(end_s)
50 |                 new_start = _nearest(file, start)
51 |                 new_end = _nearest(file, end)
52 |                 if (new_start, new_end) != (start, end):
53 |                     fixes.append((file, start, new_start))
54 |                     fixes.append((file, end, new_end))
55 |                 return f"{file}:{new_start}-{new_end}"
56 |             else:
57 |                 new_line = _nearest(file, start)
58 |                 if new_line != start:
59 |                     fixes.append((file, start, new_line))
60 |                 return f"{file}:{new_line}"
61 | 
62 |         fixed_comment = cls.REF_PATTERN.sub(_replacer, comment)
63 |         return fixed_comment, fixes
64 | 


--------------------------------------------------------------------------------
/src/kit/queries/c/tags.scm:
--------------------------------------------------------------------------------
 1 | ;; C symbol queries (tree-sitter-c)
 2 | 
 3 | ;; Functions
 4 | (function_definition
 5 |   declarator: (function_declarator
 6 |                 declarator: (identifier) @name)) @definition.function
 7 | 
 8 | ;; Structs
 9 | (struct_specifier
10 |   name: (type_identifier) @name) @definition.struct
11 | 
12 | ;; Enums
13 | (enum_specifier
14 |   name: (type_identifier) @name) @definition.enum
15 | 


--------------------------------------------------------------------------------
/src/kit/queries/go/tags.scm:
--------------------------------------------------------------------------------
 1 | ;; tags.scm for Go symbol extraction (tree-sitter-go)
 2 | 
 3 | ; Function Declarations (including main)
 4 | (function_declaration
 5 |   name: (identifier) @name) @definition.function
 6 | 
 7 | ; Method Declarations (functions with receivers)
 8 | (method_declaration
 9 |     name: (field_identifier) @name) @definition.method
10 | 
11 | ; Struct Type Declarations
12 | (type_declaration
13 |     (type_spec
14 |         name: (type_identifier) @name
15 |         type: (struct_type)) @definition.struct)
16 | 
17 | ; Interface Type Declarations
18 | (type_declaration
19 |     (type_spec
20 |         name: (type_identifier) @name
21 |         type: (interface_type)) @definition.interface)
22 | 
23 | ; Package-level variable declarations
24 | (var_declaration
25 |     (var_spec
26 |         name: (identifier) @name)) @definition.variable
27 | 
28 | ; Package-level constant declarations
29 | (const_declaration
30 |     (const_spec
31 |         name: (identifier) @name)) @definition.constant
32 | 


--------------------------------------------------------------------------------
/src/kit/queries/hcl/tags.scm:
--------------------------------------------------------------------------------
 1 | ; Capture resource blocks (resource "aws_instance" "example")
 2 | (block
 3 |   (identifier) @block_keyword
 4 |   (string_lit) @type
 5 |   (string_lit) @name
 6 |   (#eq? @block_keyword "resource")
 7 | ) @definition.resource
 8 | 
 9 | ; Capture variables (variable "my_var")
10 | (block
11 |   (identifier) @type
12 |   (string_lit) @name
13 |   (#eq? @type "variable")
14 | ) @definition.variable
15 | 
16 | ; Capture outputs (output "my_output")
17 | (block
18 |   (identifier) @type
19 |   (string_lit) @name
20 |   (#eq? @type "output")
21 | ) @definition.output
22 | 
23 | ; Capture modules (module "my_module")
24 | (block
25 |   (identifier) @type
26 |   (string_lit) @name
27 |   (#eq? @type "module")
28 | ) @definition.module
29 | 
30 | ; Capture providers (provider "aws")
31 | (block
32 |   (identifier) @type
33 |   (string_lit) @name
34 |   (#eq? @type "provider")
35 | ) @definition.provider
36 | 
37 | ; Capture data sources (data "aws_ami" "example")
38 | (block
39 |   (identifier) @block_type
40 |   (string_lit) @type
41 |   (string_lit) @name
42 |   (#eq? @block_type "data")
43 | ) @definition.data
44 | 
45 | ; Locals and generic identifier-only blocks
46 | (block
47 |   (identifier) @type
48 |   (#eq? @type "locals")
49 | ) @definition.locals
50 | (block
51 |   (identifier) @type
52 |   (#eq? @type "terraform")
53 | ) @definition.terraform
54 | 


--------------------------------------------------------------------------------
/src/kit/queries/java/tags.scm:
--------------------------------------------------------------------------------
 1 | ;; Java symbol queries (tree-sitter-java)
 2 | 
 3 | ;; Classes
 4 | (class_declaration
 5 |   name: (identifier) @name) @definition.class
 6 | 
 7 | ;; Interfaces
 8 | (interface_declaration
 9 |   name: (identifier) @name) @definition.interface
10 | 
11 | ;; Enums
12 | (enum_declaration
13 |   name: (identifier) @name) @definition.enum
14 | 
15 | ;; Methods (instance & static)
16 | (method_declaration
17 |   name: (identifier) @name) @definition.method
18 | 
19 | ;; Constructors
20 | (constructor_declaration
21 |   name: (identifier) @name) @definition.method
22 | 


--------------------------------------------------------------------------------
/src/kit/queries/javascript/tags.scm:
--------------------------------------------------------------------------------
1 | ; Example tags.scm for JavaScript (tree-sitter-javascript)
2 | ; See https://github.com/tree-sitter/tree-sitter-javascript/blob/master/queries/highlights.scm for reference
3 | 
4 | (function_declaration
5 |   name: (identifier) @function)
6 | 
7 | (class_declaration
8 |   name: (identifier) @class)
9 | 


--------------------------------------------------------------------------------
/src/kit/queries/python/tags.scm:
--------------------------------------------------------------------------------
 1 | ;; tags.scm for Python symbol extraction
 2 | 
 3 | ; Top-level function definitions (direct child of module, potentially decorated)
 4 | (module
 5 |   (decorated_definition
 6 |     definition: (function_definition
 7 |       name: (identifier) @name) @definition.function))
 8 | (module
 9 |   (function_definition
10 |     name: (identifier) @name) @definition.function)
11 | 
12 | ; Async top-level function definitions (direct child of module, potentially decorated) - AFTER general function
13 | (module
14 |   (decorated_definition
15 |       definition: (function_definition
16 |           "async"
17 |           name: (identifier) @name) @definition.function))
18 | (module
19 |   (function_definition
20 |     "async"
21 |     name: (identifier) @name) @definition.function)
22 | 
23 | ; Class definitions
24 | (class_definition
25 |   name: (identifier) @name) @definition.class
26 | 
27 | ; Methods within classes (potentially decorated)
28 | (class_definition
29 |   body: (_ 
30 |     (decorated_definition
31 |         definition: (function_definition
32 |             name: (identifier) @name) @definition.method)))
33 | (class_definition
34 |   body: (_ 
35 |     (function_definition
36 |       name: (identifier) @name) @definition.method
37 |   ))
38 | 
39 | ; Async methods within classes (potentially decorated) - AFTER general method
40 | (class_definition
41 |   body: (_ 
42 |       (decorated_definition
43 |           definition: (function_definition
44 |               "async"
45 |               name: (identifier) @name) @definition.method)))
46 | (class_definition
47 |   body: (_ 
48 |     (function_definition
49 |       "async" ; Match the async keyword
50 |       name: (identifier) @name) @definition.method
51 |   ))
52 | 


--------------------------------------------------------------------------------
/src/kit/queries/ruby/tags.scm:
--------------------------------------------------------------------------------
 1 | ;; Ruby symbol queries
 2 | 
 3 | ;; Classes
 4 | (class
 5 |   name: (constant) @name) @definition.class
 6 | 
 7 | ;; Modules
 8 | (module
 9 |   name: (constant) @name) @definition.module
10 | 
11 | ;; Instance methods
12 | (method
13 |   name: (identifier) @name) @definition.method
14 | 
15 | ;; Singleton methods (def self.foo)
16 | (singleton_method
17 |   name: (identifier) @name) @definition.method
18 | 


--------------------------------------------------------------------------------
/src/kit/queries/rust/tags.scm:
--------------------------------------------------------------------------------
 1 | ;; tags.scm for Rust symbol extraction
 2 | 
 3 | (function_item
 4 |   name: (identifier) @name
 5 |   (#set! type "function"))
 6 | 
 7 | (struct_item
 8 |   name: (type_identifier) @name
 9 |   (#set! type "struct"))
10 | 
11 | (enum_item
12 |   name: (type_identifier) @name
13 |   (#set! type "enum"))
14 | 
15 | (trait_item
16 |   name: (type_identifier) @name
17 |   (#set! type "trait"))
18 | 
19 | (impl_item
20 |   type: (type_identifier) @name
21 |   (#set! type "impl"))
22 | 


--------------------------------------------------------------------------------
/src/kit/queries/typescript/tags.scm:
--------------------------------------------------------------------------------
 1 | ;; tags.scm for TypeScript symbol extraction
 2 | 
 3 | ; functions
 4 | (function_declaration
 5 |   name: (identifier) @name @definition.function)
 6 | 
 7 | ; classes (with optional modifiers like export)
 8 | (class_declaration
 9 |   name: (type_identifier) @name @definition.class)
10 | 
11 | ; interfaces
12 | (interface_declaration
13 |   name: (type_identifier) @name @definition.interface)
14 | 
15 | ; enums
16 | (enum_declaration
17 |   name: (identifier) @name @definition.enum)
18 | 
19 | ; Class methods (implementations, explicitly within class_body)
20 | (class_body
21 |   (method_definition
22 |     name: (property_identifier) @name @definition.method))
23 | 
24 | ; Static Class methods (implementations, explicitly within class_body)
25 | (class_body
26 |   (method_definition
27 |     "static"
28 |     name: (property_identifier) @name @definition.method))
29 | 
30 | ; Exported function (variable declarator with function)
31 | (export_statement
32 |   declaration: (variable_declaration
33 |     (variable_declarator
34 |       name: (identifier) @name
35 |       value: (function_expression)
36 |     ) @definition.function
37 |   ))
38 | 
39 | ; Exported function (variable declarator with arrow function)
40 | (export_statement
41 |   declaration: (variable_declaration
42 |     (variable_declarator
43 |       name: (identifier) @name
44 |       value: (arrow_function)
45 |     ) @definition.function
46 |   ))
47 | 
48 | ; Exported class
49 | (export_statement
50 |   declaration: (class_declaration
51 |     name: (type_identifier) @name @definition.class
52 |   ))
53 | 
54 | ; Exported interface
55 | (export_statement
56 |   declaration: (interface_declaration
57 |     name: (type_identifier) @name @definition.interface
58 |   ))
59 | 
60 | ; Exported enum
61 | (export_statement
62 |   declaration: (enum_declaration
63 |     name: (identifier) @name @definition.enum
64 |   ))
65 | 
66 | ; Type alias
67 | (type_alias_declaration
68 |     name: (type_identifier) @name @definition.type)
69 | 
70 | ; Exported type alias
71 | (export_statement
72 |     declaration: (type_alias_declaration
73 |         name: (type_identifier) @name @definition.type
74 |     ))
75 | 
76 | ; Namespace (try internal_module)
77 | (internal_module
78 |   name: (identifier) @name @definition.namespace)
79 | 


--------------------------------------------------------------------------------
/src/kit/tool_schemas.py:
--------------------------------------------------------------------------------
 1 | """Utility helpers for exposing kit's MCP tool schema to LLM runtimes.
 2 | 
 3 | This module lets you grab the same JSON-Schema objects that the MCP server
 4 | would advertise, without having to spin up a server.  Pass the list directly
 5 | as the `tools` / `functions` parameter to OpenAI, Anthropic, etc.
 6 | """
 7 | 
 8 | from __future__ import annotations
 9 | 
10 | from typing import Any, Dict, List
11 | 
12 | # Avoid importing heavy MCP dependencies at module-import time to prevent
13 | # unnecessary ImportError for users who never need the helper.  Everything is
14 | # imported lazily inside the function.
15 | 
16 | __all__ = ["get_tool_schemas"]
17 | 
18 | 
19 | def get_tool_schemas() -> List[Dict[str, Any]]:
20 |     """Return the JSON-serialisable schema for every kit tool.
21 | 
22 |     Example
23 |     -------
24 |     >>> from kit.tool_schemas import get_tool_schemas
25 |     >>> openai_client.chat.completions.create(
26 |     ...     model="gpt-4o",
27 |     ...     tools=get_tool_schemas(),
28 |     ...     messages=[...],
29 |     ... )
30 |     """
31 |     # Late imports to avoid circular dependencies *and* to keep MCP optional
32 |     try:
33 |         from mcp.types import Tool  # type: ignore
34 | 
35 |         from kit.mcp.server import KitServerLogic  # type: ignore
36 |     except ImportError as e:  # pragma: no cover – pack not installed
37 |         raise ImportError(
38 |             "`get_tool_schemas()` requires the optional `mcp` package. \n"
39 |             "Install it via `pip install mcp-spec` or `pip install cased-kit[mcp]`."
40 |         ) from e
41 | 
42 |     logic = KitServerLogic()
43 |     tools: List[Tool] = logic.list_tools()
44 |     # `model_dump` is a Pydantic method (v2) that returns plain dicts ready for JSON
45 |     return [tool.model_dump(mode="json") for tool in tools]
46 | 


--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # test.sh: Run all tests in the correct environment
 3 | set -e
 4 | 
 5 | # Ensure environment is set up
 6 | source $HOME/.venv/bin/activate 2>/dev/null || true
 7 | export KIT_TREE_SITTER_LIB="build/my-languages.so"
 8 | 
 9 | # Run tests with uv if available, else fallback to python
10 | if command -v uv &> /dev/null; then
11 |   uv run pytest -v tests
12 | else
13 |   python -m pytest -v tests
14 | fi
15 | 


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
  1 | # Test Suite Documentation
  2 | 
  3 | This directory contains comprehensive tests for the kit project, including unit tests, integration tests, and LLM accuracy tests.
  4 | 
  5 | ## Test Categories
  6 | 
  7 | ### 🚀 Fast Tests (Default)
  8 | These run in CI and don't require external dependencies:
  9 | ```bash
 10 | # Run all fast tests (excludes LLM tests)
 11 | pytest
 12 | 
 13 | # Run specific test categories
 14 | pytest tests/test_pr_review.py           # PR review functionality 
 15 | pytest tests/test_diff_parser.py         # Diff parsing unit tests
 16 | pytest tests/test_diff_integration.py    # Diff parsing integration tests
 17 | ```
 18 | 
 19 | ### 🧪 Integration Tests
 20 | Tests that verify end-to-end functionality:
 21 | ```bash
 22 | # Run integration tests
 23 | pytest -m integration
 24 | 
 25 | # Run integration tests excluding LLM calls
 26 | pytest -m "integration and not llm"
 27 | ```
 28 | 
 29 | ### 🤖 LLM Tests (Expensive)
 30 | Tests that actually call LLM APIs.
 31 | 
 32 | **Requirements:**
 33 | - API keys: `ANTHROPIC_API_KEY` or `OPENAI_API_KEY`
 34 | - These tests cost money and are slow
 35 | - Automatically skipped in CI environments
 36 | 
 37 | ```bash
 38 | # Run LLM tests (requires API keys)
 39 | pytest -m llm
 40 | 
 41 | # Run specific LLM test
 42 | pytest tests/test_llm_line_accuracy.py::TestLLMLineAccuracy::test_llm_with_accurate_context
 43 | ```
 44 | 
 45 | ### 💰 Expensive Tests
 46 | Large-scale tests that may be slow or costly:
 47 | ```bash
 48 | # Run expensive tests (includes LLM tests)
 49 | pytest -m expensive
 50 | ```
 51 | 
 52 | ## Test Markers
 53 | 
 54 | | Marker | Description | Auto-skipped in CI |
 55 | |--------|-------------|-------------------|
 56 | | `integration` | Integration tests | No |
 57 | | `llm` | Tests that call LLM APIs | Yes |
 58 | | `expensive` | Slow/costly tests | Yes |
 59 | 
 60 | ## Line Number Accuracy Tests
 61 | 
 62 | 
 63 | ### What These Tests Do
 64 | 1. **Before/After Comparison** - Test LLM accuracy with vs without our context
 65 | 2. **Real API Calls** - Actually call Anthropic/OpenAI APIs
 66 | 3. **Accuracy Measurement** - Validate that 70-80%+ of line references are correct
 67 | 4. **Multi-file Testing** - Test accuracy across multiple changed files
 68 | 
 69 | ### Running LLM Tests Locally
 70 | 
 71 | 1. **Set up API keys:**
 72 |    ```bash
 73 |    export ANTHROPIC_API_KEY="your-key-here"
 74 |    # OR
 75 |    export OPENAI_API_KEY="your-key-here"
 76 |    ```
 77 | 
 78 | 2. **Run tests:**
 79 |    ```bash
 80 |    # Run all LLM tests
 81 |    pytest tests/test_llm_line_accuracy.py -v
 82 |    
 83 |    # Run specific test
 84 |    pytest tests/test_llm_line_accuracy.py::TestLLMLineAccuracy::test_llm_without_context_comparison -v -s
 85 |    ```
 86 | 
 87 | 3. **Expected costs:**
 88 |    - Uses cheaper models (`claude-3-5-haiku`, `gpt-4o-mini`)
 89 |    - ~$0.01-0.05 per test run
 90 |    - Total test suite: ~$0.20-0.50
 91 | 
 92 | ## CI Behavior
 93 | 
 94 | - **GitHub Actions** and other CI environments automatically skip LLM tests
 95 | - Only fast unit and integration tests run in CI
 96 | - Developers can run LLM tests locally to verify improvements
 97 | 
 98 | ## Troubleshooting
 99 | 
100 | ### Import Errors
101 | If you see import errors when running tests:
102 | ```bash
103 | # Install in development mode
104 | pip install -e .
105 | 
106 | # Or install with test dependencies
107 | pip install -e .[test-api]
108 | ```
109 | 
110 | ### API Key Issues
111 | LLM tests will be automatically skipped if:
112 | - No API keys are found
113 | - Running in CI environment
114 | - API calls fail
115 | 
116 | ### Test Performance
117 | ```bash
118 | # Run fast tests only
119 | pytest --ignore=tests/test_llm_line_accuracy.py
120 | 
121 | # Run with minimal output
122 | pytest -q
123 | 
124 | # Run specific test file
125 | pytest tests/test_diff_parser.py -v
126 | ``` 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from pathlib import Path
3 | 
4 | ROOT = Path(__file__).resolve().parents[1]
5 | if str(ROOT) not in sys.path:
6 |     sys.path.insert(0, str(ROOT))
7 | 


--------------------------------------------------------------------------------
/tests/evals/pr_test_set.txt:
--------------------------------------------------------------------------------
1 | https://github.com/cased/comet/pull/2071
2 | https://github.com/gunnarmorling/1brc/pull/21
3 | https://github.com/nodejs/node/pull/53337 


--------------------------------------------------------------------------------
/tests/examples/python_dependency_analysis/README.md:
--------------------------------------------------------------------------------
 1 | # Python Dependency Analysis Demo
 2 | 
 3 | This example demonstrates how to use the Kit SDK's Python Dependency Analyzer to analyze, visualize, and understand import relationships in Python codebases.
 4 | 
 5 | ## Overview
 6 | 
 7 | The demo includes a Python script that analyzes the Kit repository itself, showcasing how the `PythonDependencyAnalyzer` can:
 8 | 
 9 | 1. Build a comprehensive dependency graph of Python modules
10 | 2. Identify circular dependencies
11 | 3. Generate visualizations of module relationships
12 | 4. Create LLM-friendly context for understanding code structure
13 | 5. Analyze specific modules and their interdependencies
14 | 
15 | ## Key Features Demonstrated
16 | 
17 | This demo shows how Kit's Python Dependency Analyzer can:
18 | 
19 | 1. **Map Import Relationships**: Identify which modules import others, both directly and indirectly.
20 | 
21 | 2. **Detect Circular Dependencies**: Find circular imports that might cause issues at runtime.
22 | 
23 | 3. **Visualize Code Structure**: Generate visual representations of module relationships.
24 | 
25 | 4. **Generate Documentation**: Create LLM-friendly context that describes the code structure in natural language.
26 | 
27 | 5. **Analyze Specific Modules**: Dive deep into the dependencies and dependents of particular modules.
28 | 
29 | ## Running the Demo
30 | 
31 | To run the demo and analyze the Kit repository:
32 | 
33 | ```bash
34 | # Create output directory
35 | mkdir -p output
36 | 
37 | # Run basic analysis
38 | python analyze_python_deps.py
39 | 
40 | # Run with visualization
41 | python analyze_python_deps.py --visualize
42 | 
43 | # Generate LLM-friendly context
44 | python analyze_python_deps.py --llm-context
45 | 
46 | # Export in different formats
47 | python analyze_python_deps.py --format json
48 | python analyze_python_deps.py --format graphml
49 | 
50 | # Analyze a specific module
51 | python analyze_python_deps.py --module kit.dependency_analyzer.python_dependency_analyzer
52 | 
53 | # Run all options
54 | python analyze_python_deps.py --visualize --llm-context --format dot --module kit.repository
55 | ```
56 | 
57 | ## Implementation Details
58 | 
59 | The analyzer works by:
60 | 
61 | 1. **Parsing Python Files**: It examines each Python file in the codebase to extract import statements.
62 | 
63 | 2. **Mapping Module Names**: It builds a map between module identifiers and file paths.
64 | 
65 | 3. **Building a Dependency Graph**: It creates a graph where nodes are modules and edges represent import relationships.
66 | 
67 | 4. **Analyzing the Graph**: It uses graph algorithms to find cycles, central modules, and other insights.
68 | 
69 | The Kit repository is an ideal showcase for the analyzer because it has a well-structured codebase with clear module boundaries and dependencies.
70 | 
71 | ## Example Output
72 | 
73 | The analyzer produces several outputs:
74 | 
75 | 1. **Dependency Graph**: A graph representation of module relationships in various formats (.dot, .json, .graphml).
76 | 
77 | 2. **Visualization**: A visual representation of the dependency graph (.png).
78 | 
79 | 3. **LLM Context**: A markdown document describing the code structure in natural language.
80 | 
81 | 4. **Module Analysis**: Detailed information about specific modules, including their dependencies and dependents.
82 | 
83 | 5. **Console Output**: A summary of key findings, including circular dependencies and central modules.
84 | 
85 | ## Notes
86 | 
87 | - The visualization requires the Graphviz package. If you encounter errors, install it with `pip install graphviz`.
88 | - You may also need to install the Graphviz system package (`brew install graphviz` on macOS).
89 | - Large repositories may generate complex visualizations that are hard to interpret. Consider filtering to specific modules.
90 | 


--------------------------------------------------------------------------------
/tests/examples/terraform_dependency_analysis/README.md:
--------------------------------------------------------------------------------
 1 | # Terraform Dependency Analysis Demo
 2 | 
 3 | This example demonstrates how to use the Kit SDK's Terraform Dependency Analyzer to analyze, visualize, and understand complex infrastructure relationships in Terraform configurations.
 4 | 
 5 | ## Overview
 6 | 
 7 | The demo includes:
 8 | 
 9 | 1. A complex AWS infrastructure setup with multiple interdependent resources:
10 |    - Networking (VPC, subnets, gateways, route tables)
11 |    - Computing (EC2, Auto Scaling Groups, Load Balancers)
12 |    - Storage (S3 buckets, EFS)
13 |    - Databases (RDS, ElastiCache)
14 |    - Security (Security Groups, IAM roles, KMS keys)
15 |    - DNS (Route53, ACM certificates)
16 | 
17 | 2. A Python script that uses Kit's `TerraformDependencyAnalyzer` to:
18 |    - Build a comprehensive dependency graph
19 |    - Identify circular dependencies
20 |    - Generate visualizations
21 |    - Create LLM-friendly context about the infrastructure
22 |    - Analyze key resources and their relationships
23 | 
24 | ## File Structure
25 | 
26 | - `main.tf` - Provider configuration and project structure
27 | - `variables.tf` - Input variables for the infrastructure
28 | - `network.tf` - VPC, subnets, gateways, and routing
29 | - `compute.tf` - EC2, Auto Scaling, and Load Balancers
30 | - `storage.tf` - S3 buckets and EFS resources
31 | - `database.tf` - RDS PostgreSQL and ElastiCache Redis
32 | - `security.tf` - Security groups and encryption keys
33 | - `dns.tf` - Route53 and ACM certificates
34 | - `outputs.tf` - Output values from the infrastructure
35 | - `analyze_terraform_deps.py` - Demo script using Kit's TerraformDependencyAnalyzer
36 | 
37 | ## Running the Demo
38 | 
39 | To run the demo and analyze the Terraform configuration:
40 | 
41 | ```bash
42 | # Create output directory
43 | mkdir -p output
44 | 
45 | # Run basic analysis
46 | python analyze_terraform_deps.py
47 | 
48 | # Run with visualization
49 | python analyze_terraform_deps.py --visualize
50 | 
51 | # Generate LLM-friendly context
52 | python analyze_terraform_deps.py --llm-context
53 | 
54 | # Export in different formats
55 | python analyze_terraform_deps.py --format json
56 | python analyze_terraform_deps.py --format graphml
57 | 
58 | # Run all options
59 | python analyze_terraform_deps.py --visualize --llm-context --format dot
60 | ```
61 | 
62 | ## Key Features Demonstrated
63 | 
64 | This demo showcases how Kit's TerraformDependencyAnalyzer can:
65 | 
66 | 1. **Map Resource Relationships**: Identify which resources depend on others, both directly and indirectly.
67 | 
68 | 2. **Detect Potential Issues**: Find circular dependencies that might cause deployment problems.
69 | 
70 | 3. **Visualize Architecture**: Generate visual representations of your infrastructure.
71 | 
72 | 4. **Generate Documentation**: Create LLM-friendly context that describes the infrastructure in natural language.
73 | 
74 | 5. **Identify Central Components**: Find the most interconnected resources in your infrastructure.
75 | 
76 | ## Implementation Details
77 | 
78 | The analyzer uses HCL parsing to extract dependencies from Terraform files and builds a graph representation of the infrastructure. It can identify dependencies through:
79 | 
80 | - Direct references (e.g., `aws_instance.example.id`)
81 | - String interpolations (e.g., `"${aws_vpc.main.id}"`)
82 | - Resource attributes (e.g., within a resource's configuration)
83 | 
84 | The demo infrastructure is designed to have realistic dependencies that showcase the analyzer's capabilities, including resource references across different files and modules.
85 | 


--------------------------------------------------------------------------------
/tests/examples/terraform_dependency_analysis/database.tf:
--------------------------------------------------------------------------------
  1 | # Database resources including RDS instances and ElastiCache
  2 | 
  3 | # RDS Subnet Group
  4 | resource "aws_db_subnet_group" "main" {
  5 |   name       = "${var.project_name}-db-subnet-group"
  6 |   subnet_ids = aws_subnet.database[*].id
  7 | 
  8 |   tags = {
  9 |     Name = "${var.project_name}-db-subnet-group"
 10 |   }
 11 | }
 12 | 
 13 | # RDS PostgreSQL Instance
 14 | resource "aws_db_instance" "main" {
 15 |   identifier             = "${var.project_name}-db"
 16 |   engine                 = "postgres"
 17 |   engine_version         = "14.7"
 18 |   instance_class         = var.db_instance_class
 19 |   allocated_storage      = 20
 20 |   max_allocated_storage  = 100
 21 |   storage_type           = "gp3"
 22 |   storage_encrypted      = true
 23 |   kms_key_id             = aws_kms_key.app.arn
 24 |   db_name                = var.db_name
 25 |   username               = var.db_username
 26 |   password               = var.db_password
 27 |   db_subnet_group_name   = aws_db_subnet_group.main.name
 28 |   vpc_security_group_ids = [aws_security_group.db.id]
 29 |   publicly_accessible    = false
 30 |   skip_final_snapshot    = true
 31 |   apply_immediately      = true
 32 |   deletion_protection    = false
 33 | 
 34 |   backup_retention_period = 7
 35 |   backup_window           = "03:00-04:00"
 36 |   maintenance_window      = "mon:04:00-mon:05:00"
 37 | 
 38 |   performance_insights_enabled          = true
 39 |   performance_insights_retention_period = 7
 40 |   performance_insights_kms_key_id       = aws_kms_key.app.arn
 41 | 
 42 |   tags = {
 43 |     Name = "${var.project_name}-db"
 44 |   }
 45 | }
 46 | 
 47 | # RDS Read Replica for high availability
 48 | resource "aws_db_instance" "replica" {
 49 |   identifier             = "${var.project_name}-db-replica"
 50 |   instance_class         = var.db_instance_class
 51 |   replicate_source_db    = aws_db_instance.main.identifier
 52 |   vpc_security_group_ids = [aws_security_group.db.id]
 53 |   publicly_accessible    = false
 54 |   skip_final_snapshot    = true
 55 |   apply_immediately      = true
 56 | 
 57 |   storage_encrypted      = true
 58 |   kms_key_id             = aws_kms_key.app.arn
 59 | 
 60 |   performance_insights_enabled          = true
 61 |   performance_insights_retention_period = 7
 62 |   performance_insights_kms_key_id       = aws_kms_key.app.arn
 63 | 
 64 |   tags = {
 65 |     Name = "${var.project_name}-db-replica"
 66 |   }
 67 | }
 68 | 
 69 | # ElastiCache Redis Subnet Group
 70 | resource "aws_elasticache_subnet_group" "main" {
 71 |   name       = "${var.project_name}-cache-subnet-group"
 72 |   subnet_ids = aws_subnet.private[*].id
 73 | 
 74 |   tags = {
 75 |     Name = "${var.project_name}-cache-subnet-group"
 76 |   }
 77 | }
 78 | 
 79 | # ElastiCache Redis Parameter Group
 80 | resource "aws_elasticache_parameter_group" "main" {
 81 |   name   = "${var.project_name}-cache-params"
 82 |   family = "redis6.x"
 83 | 
 84 |   parameter {
 85 |     name  = "maxmemory-policy"
 86 |     value = "allkeys-lru"
 87 |   }
 88 | 
 89 |   tags = {
 90 |     Name = "${var.project_name}-cache-params"
 91 |   }
 92 | }
 93 | 
 94 | # ElastiCache Redis Cluster
 95 | resource "aws_elasticache_replication_group" "main" {
 96 |   replication_group_id       = "${var.project_name}-cache"
 97 |   description                = "Redis cache for ${var.project_name}"
 98 |   node_type                  = "cache.t3.small"
 99 |   port                       = 6379
100 |   parameter_group_name       = aws_elasticache_parameter_group.main.name
101 |   subnet_group_name          = aws_elasticache_subnet_group.main.name
102 |   security_group_ids         = [aws_security_group.cache.id]
103 |   automatic_failover_enabled = true
104 |   num_cache_clusters         = 2
105 |   at_rest_encryption_enabled = true
106 |   transit_encryption_enabled = true
107 |   kms_key_id                 = aws_kms_key.app.arn
108 | 
109 |   tags = {
110 |     Name = "${var.project_name}-cache"
111 |   }
112 | }
113 | 


--------------------------------------------------------------------------------
/tests/examples/terraform_dependency_analysis/dns.tf:
--------------------------------------------------------------------------------
 1 | # DNS configuration resources using Route53
 2 | 
 3 | # Route53 Zone (assuming it's already created)
 4 | data "aws_route53_zone" "main" {
 5 |   name = var.domain_name
 6 | }
 7 | 
 8 | # ACM Certificate for HTTPS
 9 | resource "aws_acm_certificate" "cert" {
10 |   domain_name       = var.domain_name
11 |   validation_method = "DNS"
12 | 
13 |   subject_alternative_names = [
14 |     "*.${var.domain_name}"
15 |   ]
16 | 
17 |   lifecycle {
18 |     create_before_destroy = true
19 |   }
20 | 
21 |   tags = {
22 |     Name = "${var.project_name}-certificate"
23 |   }
24 | }
25 | 
26 | # DNS Validation Records
27 | resource "aws_route53_record" "cert_validation" {
28 |   for_each = {
29 |     for dvo in aws_acm_certificate.cert.domain_validation_options : dvo.domain_name => {
30 |       name   = dvo.resource_record_name
31 |       record = dvo.resource_record_value
32 |       type   = dvo.resource_record_type
33 |     }
34 |   }
35 | 
36 |   allow_overwrite = true
37 |   name            = each.value.name
38 |   records         = [each.value.record]
39 |   ttl             = 60
40 |   type            = each.value.type
41 |   zone_id         = data.aws_route53_zone.main.zone_id
42 | }
43 | 
44 | # Certificate validation
45 | resource "aws_acm_certificate_validation" "cert" {
46 |   certificate_arn         = aws_acm_certificate.cert.arn
47 |   validation_record_fqdns = [for record in aws_route53_record.cert_validation : record.fqdn]
48 | }
49 | 
50 | # Application DNS record
51 | resource "aws_route53_record" "app" {
52 |   zone_id = data.aws_route53_zone.main.zone_id
53 |   name    = var.domain_name
54 |   type    = "A"
55 | 
56 |   alias {
57 |     name                   = aws_lb.app.dns_name
58 |     zone_id                = aws_lb.app.zone_id
59 |     evaluate_target_health = true
60 |   }
61 | }
62 | 
63 | # Wildcard DNS record
64 | resource "aws_route53_record" "wildcard" {
65 |   zone_id = data.aws_route53_zone.main.zone_id
66 |   name    = "*.${var.domain_name}"
67 |   type    = "A"
68 | 
69 |   alias {
70 |     name                   = aws_lb.app.dns_name
71 |     zone_id                = aws_lb.app.zone_id
72 |     evaluate_target_health = true
73 |   }
74 | }
75 | 


--------------------------------------------------------------------------------
/tests/examples/terraform_dependency_analysis/main.tf:
--------------------------------------------------------------------------------
 1 | # Main Terraform configuration file
 2 | # Sets up AWS provider and basic infrastructure
 3 | 
 4 | terraform {
 5 |   required_providers {
 6 |     aws = {
 7 |       source  = "hashicorp/aws"
 8 |       version = "~> 4.0"
 9 |     }
10 |   }
11 |   required_version = ">= 1.0.0"
12 | }
13 | 
14 | provider "aws" {
15 |   region = var.aws_region
16 |   default_tags {
17 |     tags = {
18 |       Project     = var.project_name
19 |       Environment = var.environment
20 |       ManagedBy   = "Terraform"
21 |     }
22 |   }
23 | }
24 | 
25 | # VPC and networking are defined in network.tf
26 | # S3 and storage components are defined in storage.tf
27 | # Compute resources are defined in compute.tf
28 | # Database resources are defined in database.tf
29 | # Security components are defined in security.tf
30 | # Outputs are defined in outputs.tf
31 | 


--------------------------------------------------------------------------------
/tests/examples/terraform_dependency_analysis/network.tf:
--------------------------------------------------------------------------------
  1 | # Network infrastructure components
  2 | 
  3 | # VPC
  4 | resource "aws_vpc" "main" {
  5 |   cidr_block           = var.vpc_cidr
  6 |   enable_dns_support   = true
  7 |   enable_dns_hostnames = true
  8 | 
  9 |   tags = {
 10 |     Name = "${var.project_name}-vpc"
 11 |   }
 12 | }
 13 | 
 14 | # Internet Gateway
 15 | resource "aws_internet_gateway" "main" {
 16 |   vpc_id = aws_vpc.main.id
 17 | 
 18 |   tags = {
 19 |     Name = "${var.project_name}-igw"
 20 |   }
 21 | }
 22 | 
 23 | # Public Subnets
 24 | resource "aws_subnet" "public" {
 25 |   count             = length(var.public_subnet_cidrs)
 26 |   vpc_id            = aws_vpc.main.id
 27 |   cidr_block        = var.public_subnet_cidrs[count.index]
 28 |   availability_zone = data.aws_availability_zones.available.names[count.index]
 29 |   map_public_ip_on_launch = true
 30 | 
 31 |   tags = {
 32 |     Name = "${var.project_name}-public-subnet-${count.index + 1}"
 33 |   }
 34 | }
 35 | 
 36 | # Private Subnets
 37 | resource "aws_subnet" "private" {
 38 |   count             = length(var.private_subnet_cidrs)
 39 |   vpc_id            = aws_vpc.main.id
 40 |   cidr_block        = var.private_subnet_cidrs[count.index]
 41 |   availability_zone = data.aws_availability_zones.available.names[count.index]
 42 | 
 43 |   tags = {
 44 |     Name = "${var.project_name}-private-subnet-${count.index + 1}"
 45 |   }
 46 | }
 47 | 
 48 | # Database Subnets
 49 | resource "aws_subnet" "database" {
 50 |   count             = length(var.db_subnet_cidrs)
 51 |   vpc_id            = aws_vpc.main.id
 52 |   cidr_block        = var.db_subnet_cidrs[count.index]
 53 |   availability_zone = data.aws_availability_zones.available.names[count.index]
 54 | 
 55 |   tags = {
 56 |     Name = "${var.project_name}-db-subnet-${count.index + 1}"
 57 |   }
 58 | }
 59 | 
 60 | # Elastic IP for NAT Gateway
 61 | resource "aws_eip" "nat" {
 62 |   domain = "vpc"
 63 |   depends_on = [aws_internet_gateway.main]
 64 | 
 65 |   tags = {
 66 |     Name = "${var.project_name}-nat-eip"
 67 |   }
 68 | }
 69 | 
 70 | # NAT Gateway
 71 | resource "aws_nat_gateway" "main" {
 72 |   allocation_id = aws_eip.nat.id
 73 |   subnet_id     = aws_subnet.public[0].id
 74 |   depends_on    = [aws_internet_gateway.main]
 75 | 
 76 |   tags = {
 77 |     Name = "${var.project_name}-nat"
 78 |   }
 79 | }
 80 | 
 81 | # Route Tables
 82 | resource "aws_route_table" "public" {
 83 |   vpc_id = aws_vpc.main.id
 84 | 
 85 |   route {
 86 |     cidr_block = "0.0.0.0/0"
 87 |     gateway_id = aws_internet_gateway.main.id
 88 |   }
 89 | 
 90 |   tags = {
 91 |     Name = "${var.project_name}-public-route-table"
 92 |   }
 93 | }
 94 | 
 95 | resource "aws_route_table" "private" {
 96 |   vpc_id = aws_vpc.main.id
 97 | 
 98 |   route {
 99 |     cidr_block     = "0.0.0.0/0"
100 |     nat_gateway_id = aws_nat_gateway.main.id
101 |   }
102 | 
103 |   tags = {
104 |     Name = "${var.project_name}-private-route-table"
105 |   }
106 | }
107 | 
108 | # Route Table Associations
109 | resource "aws_route_table_association" "public" {
110 |   count          = length(var.public_subnet_cidrs)
111 |   subnet_id      = aws_subnet.public[count.index].id
112 |   route_table_id = aws_route_table.public.id
113 | }
114 | 
115 | resource "aws_route_table_association" "private" {
116 |   count          = length(var.private_subnet_cidrs)
117 |   subnet_id      = aws_subnet.private[count.index].id
118 |   route_table_id = aws_route_table.private.id
119 | }
120 | 
121 | # Network ACLs
122 | resource "aws_network_acl" "public" {
123 |   vpc_id     = aws_vpc.main.id
124 |   subnet_ids = aws_subnet.public[*].id
125 | 
126 |   ingress {
127 |     protocol   = -1
128 |     rule_no    = 100
129 |     action     = "allow"
130 |     cidr_block = "0.0.0.0/0"
131 |     from_port  = 0
132 |     to_port    = 0
133 |   }
134 | 
135 |   egress {
136 |     protocol   = -1
137 |     rule_no    = 100
138 |     action     = "allow"
139 |     cidr_block = "0.0.0.0/0"
140 |     from_port  = 0
141 |     to_port    = 0
142 |   }
143 | 
144 |   tags = {
145 |     Name = "${var.project_name}-public-nacl"
146 |   }
147 | }
148 | 
149 | resource "aws_network_acl" "private" {
150 |   vpc_id     = aws_vpc.main.id
151 |   subnet_ids = aws_subnet.private[*].id
152 | 
153 |   ingress {
154 |     protocol   = -1
155 |     rule_no    = 100
156 |     action     = "allow"
157 |     cidr_block = var.vpc_cidr
158 |     from_port  = 0
159 |     to_port    = 0
160 |   }
161 | 
162 |   egress {
163 |     protocol   = -1
164 |     rule_no    = 100
165 |     action     = "allow"
166 |     cidr_block = "0.0.0.0/0"
167 |     from_port  = 0
168 |     to_port    = 0
169 |   }
170 | 
171 |   tags = {
172 |     Name = "${var.project_name}-private-nacl"
173 |   }
174 | }
175 | 
176 | # Data source for availability zones
177 | data "aws_availability_zones" "available" {
178 |   state = "available"
179 | }
180 | 
181 | # Security groups are defined in security.tf
182 | 


--------------------------------------------------------------------------------
/tests/examples/terraform_dependency_analysis/outputs.tf:
--------------------------------------------------------------------------------
 1 | # Output values from the infrastructure
 2 | 
 3 | output "vpc_id" {
 4 |   description = "ID of the VPC"
 5 |   value       = aws_vpc.main.id
 6 | }
 7 | 
 8 | output "public_subnets" {
 9 |   description = "List of public subnet IDs"
10 |   value       = aws_subnet.public[*].id
11 | }
12 | 
13 | output "private_subnets" {
14 |   description = "List of private subnet IDs"
15 |   value       = aws_subnet.private[*].id
16 | }
17 | 
18 | output "database_subnets" {
19 |   description = "List of database subnet IDs"
20 |   value       = aws_subnet.database[*].id
21 | }
22 | 
23 | output "alb_dns_name" {
24 |   description = "DNS name of the load balancer"
25 |   value       = aws_lb.app.dns_name
26 | }
27 | 
28 | output "domain_name" {
29 |   description = "Domain name of the application"
30 |   value       = var.domain_name
31 | }
32 | 
33 | output "bastion_public_ip" {
34 |   description = "Public IP address of the bastion host"
35 |   value       = aws_instance.bastion.public_ip
36 | }
37 | 
38 | output "db_endpoint" {
39 |   description = "Endpoint for the primary database"
40 |   value       = aws_db_instance.main.endpoint
41 | }
42 | 
43 | output "db_replica_endpoint" {
44 |   description = "Endpoint for the database replica"
45 |   value       = aws_db_instance.replica.endpoint
46 | }
47 | 
48 | output "redis_endpoint" {
49 |   description = "Endpoint for the Redis cluster"
50 |   value       = aws_elasticache_replication_group.main.primary_endpoint_address
51 | }
52 | 
53 | output "app_bucket_name" {
54 |   description = "Name of the S3 bucket for application data"
55 |   value       = aws_s3_bucket.app_data.bucket
56 | }
57 | 
58 | output "logs_bucket_name" {
59 |   description = "Name of the S3 bucket for logs"
60 |   value       = var.enable_logging_bucket ? aws_s3_bucket.logs[0].bucket : "Logging bucket not enabled"
61 | }
62 | 


--------------------------------------------------------------------------------
/tests/examples/terraform_dependency_analysis/security.tf:
--------------------------------------------------------------------------------
  1 | # Security related resources
  2 | 
  3 | # Application Load Balancer Security Group
  4 | resource "aws_security_group" "alb" {
  5 |   name        = "${var.project_name}-alb-sg"
  6 |   description = "Security group for the application load balancer"
  7 |   vpc_id      = aws_vpc.main.id
  8 | 
  9 |   ingress {
 10 |     description = "HTTP from anywhere"
 11 |     from_port   = 80
 12 |     to_port     = 80
 13 |     protocol    = "tcp"
 14 |     cidr_blocks = ["0.0.0.0/0"]
 15 |   }
 16 | 
 17 |   ingress {
 18 |     description = "HTTPS from anywhere"
 19 |     from_port   = 443
 20 |     to_port     = 443
 21 |     protocol    = "tcp"
 22 |     cidr_blocks = ["0.0.0.0/0"]
 23 |   }
 24 | 
 25 |   egress {
 26 |     from_port   = 0
 27 |     to_port     = 0
 28 |     protocol    = "-1"
 29 |     cidr_blocks = ["0.0.0.0/0"]
 30 |   }
 31 | 
 32 |   tags = {
 33 |     Name = "${var.project_name}-alb-sg"
 34 |   }
 35 | }
 36 | 
 37 | # Web Server Security Group
 38 | resource "aws_security_group" "web" {
 39 |   name        = "${var.project_name}-web-sg"
 40 |   description = "Security group for web servers"
 41 |   vpc_id      = aws_vpc.main.id
 42 | 
 43 |   ingress {
 44 |     description     = "HTTP from ALB"
 45 |     from_port       = 80
 46 |     to_port         = 80
 47 |     protocol        = "tcp"
 48 |     security_groups = [aws_security_group.alb.id]
 49 |   }
 50 | 
 51 |   ingress {
 52 |     description     = "HTTPS from ALB"
 53 |     from_port       = 443
 54 |     to_port         = 443
 55 |     protocol        = "tcp"
 56 |     security_groups = [aws_security_group.alb.id]
 57 |   }
 58 | 
 59 |   ingress {
 60 |     description = "SSH from VPC"
 61 |     from_port   = 22
 62 |     to_port     = 22
 63 |     protocol    = "tcp"
 64 |     cidr_blocks = [var.vpc_cidr]
 65 |   }
 66 | 
 67 |   egress {
 68 |     from_port   = 0
 69 |     to_port     = 0
 70 |     protocol    = "-1"
 71 |     cidr_blocks = ["0.0.0.0/0"]
 72 |   }
 73 | 
 74 |   tags = {
 75 |     Name = "${var.project_name}-web-sg"
 76 |   }
 77 | }
 78 | 
 79 | # Database Security Group
 80 | resource "aws_security_group" "db" {
 81 |   name        = "${var.project_name}-db-sg"
 82 |   description = "Security group for database instances"
 83 |   vpc_id      = aws_vpc.main.id
 84 | 
 85 |   ingress {
 86 |     description     = "Database access from web servers"
 87 |     from_port       = 5432
 88 |     to_port         = 5432
 89 |     protocol        = "tcp"
 90 |     security_groups = [aws_security_group.web.id]
 91 |   }
 92 | 
 93 |   egress {
 94 |     from_port   = 0
 95 |     to_port     = 0
 96 |     protocol    = "-1"
 97 |     cidr_blocks = ["0.0.0.0/0"]
 98 |   }
 99 | 
100 |   tags = {
101 |     Name = "${var.project_name}-db-sg"
102 |   }
103 | }
104 | 
105 | # Cache Security Group
106 | resource "aws_security_group" "cache" {
107 |   name        = "${var.project_name}-cache-sg"
108 |   description = "Security group for ElastiCache"
109 |   vpc_id      = aws_vpc.main.id
110 | 
111 |   ingress {
112 |     description     = "Redis access from web servers"
113 |     from_port       = 6379
114 |     to_port         = 6379
115 |     protocol        = "tcp"
116 |     security_groups = [aws_security_group.web.id]
117 |   }
118 | 
119 |   egress {
120 |     from_port   = 0
121 |     to_port     = 0
122 |     protocol    = "-1"
123 |     cidr_blocks = ["0.0.0.0/0"]
124 |   }
125 | 
126 |   tags = {
127 |     Name = "${var.project_name}-cache-sg"
128 |   }
129 | }
130 | 
131 | # Bastion Host Security Group
132 | resource "aws_security_group" "bastion" {
133 |   name        = "${var.project_name}-bastion-sg"
134 |   description = "Security group for the bastion host"
135 |   vpc_id      = aws_vpc.main.id
136 | 
137 |   ingress {
138 |     description = "SSH from anywhere"
139 |     from_port   = 22
140 |     to_port     = 22
141 |     protocol    = "tcp"
142 |     cidr_blocks = ["0.0.0.0/0"]
143 |   }
144 | 
145 |   egress {
146 |     from_port   = 0
147 |     to_port     = 0
148 |     protocol    = "-1"
149 |     cidr_blocks = ["0.0.0.0/0"]
150 |   }
151 | 
152 |   tags = {
153 |     Name = "${var.project_name}-bastion-sg"
154 |   }
155 | }
156 | 
157 | # KMS Key for encryption
158 | resource "aws_kms_key" "app" {
159 |   description             = "KMS key for application encryption"
160 |   deletion_window_in_days = 10
161 |   enable_key_rotation     = true
162 | 
163 |   tags = {
164 |     Name = "${var.project_name}-kms-key"
165 |   }
166 | }
167 | 
168 | resource "aws_kms_alias" "app" {
169 |   name          = "alias/${var.project_name}-key"
170 |   target_key_id = aws_kms_key.app.key_id
171 | }
172 | 


--------------------------------------------------------------------------------
/tests/examples/terraform_dependency_analysis/storage.tf:
--------------------------------------------------------------------------------
  1 | # Storage resources including S3 buckets and EFS file systems
  2 | 
  3 | # Application data bucket
  4 | resource "aws_s3_bucket" "app_data" {
  5 |   bucket = "${var.project_name}-app-data-${random_id.bucket_suffix.hex}"
  6 | 
  7 |   tags = {
  8 |     Name = "${var.project_name}-app-data"
  9 |   }
 10 | }
 11 | 
 12 | resource "aws_s3_bucket_versioning" "app_data_versioning" {
 13 |   bucket = aws_s3_bucket.app_data.id
 14 |   versioning_configuration {
 15 |     status = "Enabled"
 16 |   }
 17 | }
 18 | 
 19 | resource "aws_s3_bucket_server_side_encryption_configuration" "app_data_encryption" {
 20 |   bucket = aws_s3_bucket.app_data.id
 21 | 
 22 |   rule {
 23 |     apply_server_side_encryption_by_default {
 24 |       kms_master_key_id = aws_kms_key.app.arn
 25 |       sse_algorithm     = "aws:kms"
 26 |     }
 27 |   }
 28 | }
 29 | 
 30 | # Logging bucket
 31 | resource "aws_s3_bucket" "logs" {
 32 |   count  = var.enable_logging_bucket ? 1 : 0
 33 |   bucket = "${var.project_name}-logs-${random_id.bucket_suffix.hex}"
 34 | 
 35 |   tags = {
 36 |     Name = "${var.project_name}-logs"
 37 |   }
 38 | }
 39 | 
 40 | resource "aws_s3_bucket_lifecycle_configuration" "logs_lifecycle" {
 41 |   count  = var.enable_logging_bucket ? 1 : 0
 42 |   bucket = aws_s3_bucket.logs[0].id
 43 | 
 44 |   rule {
 45 |     id     = "log-rotation"
 46 |     status = "Enabled"
 47 | 
 48 |     transition {
 49 |       days          = 30
 50 |       storage_class = "STANDARD_IA"
 51 |     }
 52 | 
 53 |     transition {
 54 |       days          = 90
 55 |       storage_class = "GLACIER"
 56 |     }
 57 | 
 58 |     expiration {
 59 |       days = 365
 60 |     }
 61 |   }
 62 | }
 63 | 
 64 | # EFS file system for shared application files
 65 | resource "aws_efs_file_system" "app_files" {
 66 |   encrypted  = true
 67 |   kms_key_id = aws_kms_key.app.arn
 68 | 
 69 |   lifecycle_policy {
 70 |     transition_to_ia = "AFTER_30_DAYS"
 71 |   }
 72 | 
 73 |   tags = {
 74 |     Name = "${var.project_name}-app-files"
 75 |   }
 76 | }
 77 | 
 78 | resource "aws_efs_mount_target" "app_files" {
 79 |   count           = length(var.private_subnet_cidrs)
 80 |   file_system_id  = aws_efs_file_system.app_files.id
 81 |   subnet_id       = aws_subnet.private[count.index].id
 82 |   security_groups = [aws_security_group.efs.id]
 83 | }
 84 | 
 85 | resource "aws_security_group" "efs" {
 86 |   name        = "${var.project_name}-efs-sg"
 87 |   description = "Security group for EFS mount points"
 88 |   vpc_id      = aws_vpc.main.id
 89 | 
 90 |   ingress {
 91 |     description     = "NFS from web servers"
 92 |     from_port       = 2049
 93 |     to_port         = 2049
 94 |     protocol        = "tcp"
 95 |     security_groups = [aws_security_group.web.id]
 96 |   }
 97 | 
 98 |   egress {
 99 |     from_port   = 0
100 |     to_port     = 0
101 |     protocol    = "-1"
102 |     cidr_blocks = ["0.0.0.0/0"]
103 |   }
104 | 
105 |   tags = {
106 |     Name = "${var.project_name}-efs-sg"
107 |   }
108 | }
109 | 
110 | # Random ID for unique bucket names
111 | resource "random_id" "bucket_suffix" {
112 |   byte_length = 4
113 | }
114 | 


--------------------------------------------------------------------------------
/tests/examples/terraform_dependency_analysis/variables.tf:
--------------------------------------------------------------------------------
  1 | variable "aws_region" {
  2 |   description = "AWS region to deploy resources"
  3 |   type        = string
  4 |   default     = "us-west-2"
  5 | }
  6 | 
  7 | variable "project_name" {
  8 |   description = "Name of the project"
  9 |   type        = string
 10 |   default     = "terraform-analyzer-demo"
 11 | }
 12 | 
 13 | variable "environment" {
 14 |   description = "Environment (dev, staging, prod)"
 15 |   type        = string
 16 |   default     = "dev"
 17 | }
 18 | 
 19 | variable "vpc_cidr" {
 20 |   description = "CIDR block for the VPC"
 21 |   type        = string
 22 |   default     = "10.0.0.0/16"
 23 | }
 24 | 
 25 | variable "public_subnet_cidrs" {
 26 |   description = "CIDR blocks for the public subnets"
 27 |   type        = list(string)
 28 |   default     = ["10.0.1.0/24", "10.0.2.0/24"]
 29 | }
 30 | 
 31 | variable "private_subnet_cidrs" {
 32 |   description = "CIDR blocks for the private subnets"
 33 |   type        = list(string)
 34 |   default     = ["10.0.3.0/24", "10.0.4.0/24"]
 35 | }
 36 | 
 37 | variable "db_subnet_cidrs" {
 38 |   description = "CIDR blocks for the database subnets"
 39 |   type        = list(string)
 40 |   default     = ["10.0.5.0/24", "10.0.6.0/24"]
 41 | }
 42 | 
 43 | variable "instance_type" {
 44 |   description = "EC2 instance type"
 45 |   type        = string
 46 |   default     = "t3.micro"
 47 | }
 48 | 
 49 | variable "db_instance_class" {
 50 |   description = "RDS instance class"
 51 |   type        = string
 52 |   default     = "db.t3.small"
 53 | }
 54 | 
 55 | variable "db_name" {
 56 |   description = "Database name"
 57 |   type        = string
 58 |   default     = "appdb"
 59 | }
 60 | 
 61 | variable "db_username" {
 62 |   description = "Database master username"
 63 |   type        = string
 64 |   default     = "dbadmin"
 65 | }
 66 | 
 67 | # In a real scenario, you would use a secret manager or pass this as a parameter
 68 | variable "db_password" {
 69 |   description = "Database master password"
 70 |   type        = string
 71 |   default     = "YourPwdShouldBeDynamic!"
 72 |   sensitive   = true
 73 | }
 74 | 
 75 | # Domain configuration
 76 | variable "domain_name" {
 77 |   description = "Main domain name for the application"
 78 |   type        = string
 79 |   default     = "example.com"
 80 | }
 81 | 
 82 | # Bucket configuration
 83 | variable "enable_logging_bucket" {
 84 |   description = "Whether to create an S3 bucket for logs"
 85 |   type        = bool
 86 |   default     = true
 87 | }
 88 | 
 89 | # Auto scaling configuration
 90 | variable "min_size" {
 91 |   description = "Minimum size for the auto scaling group"
 92 |   type        = number
 93 |   default     = 2
 94 | }
 95 | 
 96 | variable "max_size" {
 97 |   description = "Maximum size for the auto scaling group"
 98 |   type        = number
 99 |   default     = 5
100 | }
101 | 
102 | variable "desired_capacity" {
103 |   description = "Desired capacity for the auto scaling group"
104 |   type        = number
105 |   default     = 2
106 | }
107 | 


--------------------------------------------------------------------------------
/tests/fixtures/realistic_repo/__init__.py:
--------------------------------------------------------------------------------
1 | """Realistic fixture repository for integration tests."""
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/realistic_repo/app.py:
--------------------------------------------------------------------------------
  1 | """Entry point of the realistic fixture repo.
  2 | Demonstrates expanded usage of AuthService, User model, and utils."""
  3 | 
  4 | from services.auth import AuthService
  5 | from utils import greet
  6 | 
  7 | 
  8 | def main() -> None:
  9 |     """Run an expanded demo workflow showcasing more features."""
 10 |     print("Kit Realistic Repo Demo\n" + "=" * 30)
 11 | 
 12 |     auth_service = AuthService()
 13 | 
 14 |     # 1. Register users
 15 |     print("\n--- Registering Users ---")
 16 |     user_alice_details = {"username": "alice", "email": "alice@example.com", "password": "password"}
 17 |     user_bob_details = {"username": "bob", "email": "bob@example.com", "password": "password"}
 18 | 
 19 |     alice = auth_service.register_user(**user_alice_details)
 20 |     if alice:
 21 |         print(f"Registered: {alice.display()}")
 22 |     else:
 23 |         print(f"Failed to register {user_alice_details['username']}")
 24 | 
 25 |     bob = auth_service.register_user(**user_bob_details)
 26 |     if bob:
 27 |         print(f"Registered: {bob.display()}")
 28 |     else:
 29 |         print(f"Failed to register {user_bob_details['username']}")
 30 | 
 31 |     # Attempt to register Alice again (should fail)
 32 |     print("\n--- Attempting to re-register Alice ---")
 33 |     alice_again = auth_service.register_user(**user_alice_details)
 34 |     if not alice_again:
 35 |         print(f"Successfully prevented re-registration of {user_alice_details['username']}.")
 36 |     else:
 37 |         print(f"ERROR: Re-registration of {user_alice_details['username']} was allowed.")
 38 | 
 39 |     # 2. Login Alice
 40 |     print("\n--- Logging in Alice ---")
 41 |     alice_token = auth_service.login(username="alice", password="password")
 42 |     if alice_token:
 43 |         print(f"Alice logged in. Token: {alice_token[:8]}...")
 44 |         assert auth_service.is_valid_token(alice_token), "Alice's token should be valid"
 45 |         print("Token is valid.")
 46 | 
 47 |         # 3. Get user from token and demonstrate User methods
 48 |         current_user = auth_service.get_user_from_token(alice_token)
 49 |         if current_user:
 50 |             print(f"\n--- User details for {current_user.name} (from token) ---")
 51 |             print(f"Initial display: {current_user.display()}")
 52 |             print(f"Last login timestamp: {current_user.last_login_at}")
 53 | 
 54 |             new_email = "alice.updated@example.com"
 55 |             print(f"Attempting to update email to: {new_email}")
 56 |             if current_user.update_email(new_email):
 57 |                 print(f"Email updated. New display: {current_user.display()}")
 58 |             else:
 59 |                 print(f"Failed to update email to {new_email}.")
 60 | 
 61 |             current_user.set_preference("theme", "dark_mode_pro")
 62 |             current_user.set_preference("notifications_level", "critical")
 63 |             print(f"Preferences set. Theme: {current_user.get_preference('theme')}")
 64 |             print(f"All preferences: {current_user.preferences}")
 65 |         else:
 66 |             print("Could not retrieve user from Alice's token.")
 67 | 
 68 |         # 4. Logout Alice
 69 |         print("\n--- Logging out Alice ---")
 70 |         auth_service.logout(alice_token)
 71 |         assert not auth_service.is_valid_token(alice_token), "Alice's token should be invalid after logout"
 72 |         print("Alice logged out. Token is now invalid.")
 73 |     else:
 74 |         print("Alice login failed.")
 75 | 
 76 |     # 5. Demonstrate deactivation with Bob
 77 |     print("\n--- Operations with Bob ---")
 78 |     bob_token = auth_service.login(username="bob", password="password")
 79 |     if bob_token and bob:
 80 |         print(f"Bob logged in. Token: {bob_token[:8]}...")
 81 |         retrieved_bob = auth_service.get_user_from_token(bob_token)
 82 |         if retrieved_bob:
 83 |             print(f"Deactivating Bob ({retrieved_bob.name})...")
 84 |             retrieved_bob.deactivate()
 85 |             print(f"Bob's account status: {'active' if retrieved_bob.is_active else 'inactive'}")
 86 | 
 87 |             print("Attempting to log in deactivated Bob...")
 88 |             bob_deactivated_token = auth_service.login(username="bob", password="password")
 89 |             if not bob_deactivated_token:
 90 |                 print("Login for deactivated Bob correctly failed.")
 91 |             else:
 92 |                 print("ERROR: Deactivated Bob was able to log in.")
 93 |         else:
 94 |             print("Could not retrieve Bob from token for deactivation.")
 95 | 
 96 |         # Logout Bob if he was logged in
 97 |         auth_service.logout(bob_token)
 98 |         print("Bob logged out.")
 99 | 
100 |     else:
101 |         print("Bob login failed or Bob object not found.")
102 | 
103 |     print(f"\n{greet('Developer')}! Demo finished.")
104 | 
105 | 
106 | if __name__ == "__main__":
107 |     main()
108 | 


--------------------------------------------------------------------------------
/tests/fixtures/realistic_repo/models/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/realistic_repo/models/user.py:
--------------------------------------------------------------------------------
 1 | """User dataclass for the realistic fixture repo."""
 2 | 
 3 | import time
 4 | from dataclasses import dataclass, field
 5 | from typing import Any, Dict, Optional
 6 | 
 7 | from ..utils import is_valid_email
 8 | 
 9 | 
10 | @dataclass
11 | class User:
12 |     id: int
13 |     name: str
14 |     email: str
15 |     is_active: bool = True
16 |     created_at: float = field(default_factory=time.time)
17 |     last_login_at: Optional[float] = None
18 |     preferences: Dict[str, Any] = field(default_factory=dict)
19 | 
20 |     def display(self) -> str:
21 |         """Return a human readable representation."""
22 |         status = "active" if self.is_active else "inactive"
23 |         return f"<{self.id}> {self.name} <{self.email}> ({status})"
24 | 
25 |     def update_email(self, new_email: str) -> bool:
26 |         """Updates the user's email address after validating it.
27 | 
28 |         Args:
29 |             new_email: The new email address.
30 | 
31 |         Returns:
32 |             True if the email was updated, False otherwise.
33 |         """
34 |         if is_valid_email(new_email):
35 |             self.email = new_email
36 |             return True
37 |         return False
38 | 
39 |     def deactivate(self) -> None:
40 |         """Deactivates the user's account."""
41 |         self.is_active = False
42 |         print(f"User {self.name} deactivated.")
43 | 
44 |     def record_login(self) -> None:
45 |         """Updates the last_login_at timestamp to the current time."""
46 |         self.last_login_at = time.time()
47 | 
48 |     def set_preference(self, key: str, value: Any) -> None:
49 |         """Sets a user preference.
50 | 
51 |         Args:
52 |             key: The preference key.
53 |             value: The preference value.
54 |         """
55 |         self.preferences[key] = value
56 | 
57 |     def get_preference(self, key: str, default: Optional[Any] = None) -> Optional[Any]:
58 |         """Gets a user preference.
59 | 
60 |         Args:
61 |             key: The preference key.
62 |             default: The default value to return if the key is not found.
63 | 
64 |         Returns:
65 |             The preference value or the default.
66 |         """
67 |         return self.preferences.get(key, default)
68 | 


--------------------------------------------------------------------------------
/tests/fixtures/realistic_repo/services/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/realistic_repo/services/auth.py:
--------------------------------------------------------------------------------
 1 | """Auth service that issues and validates in-memory tokens, and manages users."""
 2 | 
 3 | import uuid
 4 | from typing import Dict, Optional
 5 | 
 6 | from ..models.user import User
 7 | 
 8 | 
 9 | class AuthService:
10 |     """Manages user registration, login, and session tokens (in-memory demo)."""
11 | 
12 |     def __init__(self):
13 |         self._users_by_username: Dict[str, User] = {}
14 |         self._users_by_id: Dict[int, User] = {}
15 |         self._active_tokens: Dict[str, int] = {}  # token_string -> user_id
16 |         self._next_user_id: int = 1
17 | 
18 |     def register_user(self, username: str, email: str, password: str) -> Optional[User]:
19 |         """Registers a new user.
20 | 
21 |         Args:
22 |             username: The desired username.
23 |             email: The user's email address.
24 |             password: The user's password (ignored for this demo).
25 | 
26 |         Returns:
27 |             The created User object, or None if username is taken.
28 |         """
29 |         if username in self._users_by_username:
30 |             return None  # Username already taken
31 | 
32 |         # In a real app, hash the password!
33 |         user_id = self._next_user_id
34 |         new_user = User(id=user_id, name=username, email=email)
35 |         self._next_user_id += 1
36 | 
37 |         self._users_by_username[username] = new_user
38 |         self._users_by_id[user_id] = new_user
39 |         return new_user
40 | 
41 |     def login(self, *, username: str, password: str) -> Optional[str]:
42 |         """Logs in a user and returns a session token.
43 | 
44 |         Args:
45 |             username: The username.
46 |             password: The password.
47 | 
48 |         Returns:
49 |             A session token string if login is successful, None otherwise.
50 |         """
51 |         user = self._users_by_username.get(username)
52 |         # Demo: check if user exists and password is 'password' (DO NOT use in real life!)
53 |         if user and user.is_active and password == "password":
54 |             token = str(uuid.uuid4())
55 |             self._active_tokens[token] = user.id
56 |             user.record_login()  # Update last_login_at on the User model
57 |             return token
58 |         return None
59 | 
60 |     def logout(self, token: str) -> None:
61 |         """Logs out a user by invalidating their token."""
62 |         if token in self._active_tokens:
63 |             del self._active_tokens[token]
64 | 
65 |     def is_valid_token(self, token: str) -> bool:
66 |         """Checks if a token is currently valid."""
67 |         return token in self._active_tokens
68 | 
69 |     def get_user_from_token(self, token: str) -> Optional[User]:
70 |         """Retrieves the User object associated with a valid token.
71 | 
72 |         Args:
73 |             token: The session token.
74 | 
75 |         Returns:
76 |             The User object if the token is valid, None otherwise.
77 |         """
78 |         user_id = self._active_tokens.get(token)
79 |         if user_id:
80 |             return self._users_by_id.get(user_id)
81 |         return None
82 | 


--------------------------------------------------------------------------------
/tests/fixtures/realistic_repo/services/db.py:
--------------------------------------------------------------------------------
1 | """In-memory DB connector used by AuthService (toy)."""
2 | 
3 | 
4 | def connect(url: str = "sqlite://:memory:") -> str:
5 |     """Pretend to open a DB connection and return a connection ID."""
6 |     return f"conn-{url}"
7 | 


--------------------------------------------------------------------------------
/tests/fixtures/realistic_repo/utils.py:
--------------------------------------------------------------------------------
 1 | """Utility helpers for the fixture repo."""
 2 | 
 3 | import datetime
 4 | import re
 5 | 
 6 | 
 7 | def greet(name: str) -> str:
 8 |     """Return a friendly greeting."""
 9 |     return f"Hello, {name}!"
10 | 
11 | 
12 | def format_timestamp(ts: float, format_string: str = "%Y-%m-%d %H:%M:%S") -> str:
13 |     """Formats a Unix timestamp into a human-readable string.
14 | 
15 |     Args:
16 |         ts: The Unix timestamp (float).
17 |         format_string: The strftime format string.
18 | 
19 |     Returns:
20 |         A string representing the formatted timestamp.
21 |     """
22 |     dt_object = datetime.datetime.fromtimestamp(ts)
23 |     return dt_object.strftime(format_string)
24 | 
25 | 
26 | def is_valid_email(email: str) -> bool:
27 |     """Checks if the provided string is a valid email address (basic check).
28 | 
29 |     Args:
30 |         email: The email string to validate.
31 | 
32 |     Returns:
33 |         True if the email format is valid, False otherwise.
34 |     """
35 |     # A simple regex for email validation
36 |     pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
37 |     if re.match(pattern, email):
38 |         return True
39 |     return False
40 | 


--------------------------------------------------------------------------------
/tests/golden_go.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import "fmt"
 4 | 
 5 | // User represents a user in the system.
 6 | type User struct {
 7 | 	ID   int
 8 | 	Name string
 9 | }
10 | 
11 | // Greeter defines an interface for greeting.
12 | type Greeter interface {
13 | 	Greet() string
14 | }
15 | 
16 | // Greet implements the Greeter interface for User.
17 | func (u User) Greet() string {
18 | 	return fmt.Sprintf("Hello, my name is %s", u.Name)
19 | }
20 | 
21 | // Add calculates the sum of two integers.
22 | func Add(a, b int) int {
23 | 	return a + b
24 | }
25 | 
26 | // Standalone function
27 | func HelperFunction() {
28 | 	fmt.Println("This is a helper function.")
29 | }
30 | 
31 | func main() {
32 | 	user := User{ID: 1, Name: "Alice"}
33 | 	fmt.Println(user.Greet())
34 | 	fmt.Println(Add(5, 3))
35 | 	HelperFunction()
36 | }
37 | 


--------------------------------------------------------------------------------
/tests/golden_hcl.tf:
--------------------------------------------------------------------------------
 1 | # Basic HCL constructs for symbol extraction testing
 2 | 
 3 | provider "aws" {
 4 |   region = "us-west-2"
 5 | }
 6 | 
 7 | resource "aws_instance" "web_server" {
 8 |   ami           = "ami-0c55b159cbfafe1f0"
 9 |   instance_type = "t2.micro"
10 | 
11 |   tags = {
12 |     Name = "HelloWorld"
13 |   }
14 | }
15 | 
16 | resource "aws_s3_bucket" "data_bucket" {
17 |   bucket = "my-tf-test-bucket"
18 | }
19 | 
20 | data "aws_ami" "ubuntu" {
21 |   most_recent = true
22 | 
23 |   filter {
24 |     name   = "name"
25 |     values = ["ubuntu/images/hvm-ssd/ubuntu-focal-20.04-amd64-server-*"]
26 |   }
27 | 
28 |   owners = ["099720109477"] # Canonical
29 | }
30 | 
31 | variable "server_port" {
32 |   description = "The port the server will use"
33 |   type        = number
34 |   default     = 8080
35 | }
36 | 
37 | output "instance_ip_addr" {
38 |   value = aws_instance.web_server.public_ip
39 | }
40 | 
41 | module "vpc" {
42 |   source = "terraform-aws-modules/vpc/aws"
43 |   version = "3.14.0"
44 | }
45 | 
46 | locals {
47 |   service_name = "my-app"
48 |   owner        = "user@example.com"
49 | }
50 | 
51 | terraform {
52 |   required_version = ">= 1.0"
53 |   required_providers {
54 |     aws = {
55 |       source  = "hashicorp/aws"
56 |       version = "~> 4.0"
57 |     }
58 |   }
59 | }
60 | 


--------------------------------------------------------------------------------
/tests/golden_python.py:
--------------------------------------------------------------------------------
 1 | import asyncio  # Need this for the async function example
 2 | 
 3 | 
 4 | def top_level_function(arg1, arg2):
 5 |     """A regular function."""
 6 |     pass
 7 | 
 8 | 
 9 | class MyClass:
10 |     """A sample class."""
11 | 
12 |     def __init__(self, value):
13 |         self.value = value
14 | 
15 |     def method_one(self, param):
16 |         """A method within the class."""
17 |         return self.value + param
18 | 
19 | 
20 | async def async_function():
21 |     """An asynchronous function."""
22 |     await asyncio.sleep(1)
23 | 
24 | 
25 | # A top-level variable (currently not expected to be captured by basic query)
26 | CONSTANT_VALUE = 100
27 | 


--------------------------------------------------------------------------------
/tests/golden_python_complex.py:
--------------------------------------------------------------------------------
 1 | # More complex Python examples for symbol extraction
 2 | 
 3 | 
 4 | GLOBAL_VAR = 100
 5 | 
 6 | 
 7 | def decorator(func):
 8 |     def wrapper(*args, **kwargs):
 9 |         print("Before call")
10 |         result = func(*args, **kwargs)
11 |         print("After call")
12 |         return result
13 | 
14 |     return wrapper
15 | 
16 | 
17 | @decorator
18 | def decorated_function(x: int) -> int:
19 |     """A decorated function."""
20 |     return x * 2
21 | 
22 | 
23 | class OuterClass:
24 |     OUTER_CONST = "outer"
25 | 
26 |     def outer_method(self):
27 |         print("Outer method")
28 | 
29 |     class InnerClass:
30 |         INNER_CONST = "inner"
31 | 
32 |         def __init__(self, name: str):
33 |             self.name = name
34 | 
35 |         def inner_method(self):
36 |             print(f"Inner method called by {self.name}")
37 | 
38 |         @staticmethod
39 |         def static_inner():
40 |             print("Static inner method")
41 | 
42 |         def nested_function_in_method(self):
43 |             def deeply_nested():
44 |                 print("Deeply nested function")
45 | 
46 |             deeply_nested()
47 |             return "nested_func_ran"
48 | 
49 | 
50 | def generator_function(n):
51 |     """A generator function."""
52 |     i = 0
53 |     while i < n:
54 |         yield i
55 |         i += 1
56 | 
57 | 
58 | async def async_generator(n):
59 |     i = 0
60 |     while i < n:
61 |         yield i
62 |         i += 1
63 | 
64 | 
65 | def lambda_func(x, y):
66 |     return x + y
67 | 
68 | 
69 | # Top-level simple function again for baseline
70 | def another_top_level():
71 |     pass
72 | 


--------------------------------------------------------------------------------
/tests/golden_rust.rs:
--------------------------------------------------------------------------------
 1 | // Golden Rust file for symbol extraction tests
 2 | pub struct Foo {}
 3 | 
 4 | impl Foo {
 5 |     pub fn new() -> Self {
 6 |         Foo {}
 7 |     }
 8 |     pub fn bar(&self) {}
 9 | }
10 | 
11 | pub enum MyEnum {
12 |     A,
13 |     B,
14 | }
15 | 
16 | pub trait MyTrait {
17 |     fn do_it(&self);
18 | }
19 | 
20 | fn free_function() {}
21 | 


--------------------------------------------------------------------------------
/tests/golden_typescript.ts:
--------------------------------------------------------------------------------
 1 | // Golden TypeScript file for symbol extraction tests
 2 | export class MyClass {
 3 |     methodA(): void {}
 4 | }
 5 | 
 6 | export interface MyInterface {
 7 |     foo(): number;
 8 | }
 9 | 
10 | export enum MyEnum {
11 |     One,
12 |     Two,
13 | }
14 | 
15 | function helper() {}
16 | 


--------------------------------------------------------------------------------
/tests/golden_typescript_complex.ts:
--------------------------------------------------------------------------------
 1 | // More complex TypeScript examples for symbol extraction
 2 | 
 3 | import { type NextPage } from "next";
 4 | 
 5 | export const PI = 3.14159;
 6 | 
 7 | // Interface definition
 8 | export interface UserProfile {
 9 |     userId: string;
10 |     displayName: string;
11 |     email?: string; // Optional property
12 |     logActivity: (action: string) => void; // Function type property
13 | }
14 | 
15 | // Enum definition
16 | export enum Status {
17 |     Pending = 'PENDING',
18 |     Active = 'ACTIVE',
19 |     Inactive = 'INACTIVE',
20 | }
21 | 
22 | // Namespace
23 | namespace Utilities {
24 |     export function log(message: string): void {
25 |         console.log(`[UTIL]: ${message}`);
26 |     }
27 |     
28 |     export class StringHelper {
29 |         static capitalize(s: string): string {
30 |             return s.charAt(0).toUpperCase() + s.slice(1);
31 |         }
32 |     }
33 | }
34 | 
35 | // Generic Function
36 | function identity<T>(arg: T): T {
37 |     return arg;
38 | }
39 | 
40 | // Generic Class
41 | class GenericRepo<T> {
42 |     private items: T[] = [];
43 | 
44 |     add(item: T): void {
45 |         this.items.push(item);
46 |     }
47 | 
48 |     getAll(): T[] {
49 |         return this.items;
50 |     }
51 | }
52 | 
53 | // Arrow function assigned to a const
54 | export const addNumbers = (a: number, b: number): number => {
55 |     return a + b;
56 | };
57 | 
58 | // Decorated class (conceptual - requires decorator implementation)
59 | // function logged(constructor: Function) {
60 | //     console.log(`Class ${constructor.name} created`);
61 | // }
62 | 
63 | // @logged
64 | class DecoratedClass {
65 |     constructor(public name: string) {}
66 | 
67 |     greet() {
68 |         return `Hello, ${this.name}`;
69 |     }
70 | }
71 | 
72 | // Exported function
73 | export function calculateArea(width: number, height: number): number {
74 |     return width * height;
75 | }
76 | 
77 | // Async function
78 | async function fetchData(url: string): Promise<any> {
79 |     const response = await fetch(url);
80 |     if (!response.ok) {
81 |         throw new Error(`HTTP error! status: ${response.status}`);
82 |     }
83 |     return await response.json();
84 | }
85 | 
86 | // Simple class again for baseline
87 | class SimpleLogger {
88 |     log(message: string) {
89 |         console.log(message);
90 |     }
91 | }
92 | 


--------------------------------------------------------------------------------
/tests/integration/test_api_live.py:
--------------------------------------------------------------------------------
  1 | """Integration tests that run the FastAPI server with Uvicorn and hit real HTTP endpoints."""
  2 | 
  3 | import socket
  4 | import threading
  5 | import time
  6 | from pathlib import Path
  7 | 
  8 | import pytest
  9 | import requests
 10 | import uvicorn
 11 | 
 12 | SERVER_HOST = "127.0.0.1"
 13 | 
 14 | 
 15 | def _find_free_port() -> int:
 16 |     """Ask the OS for an unused port."""
 17 |     with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
 18 |         s.bind((SERVER_HOST, 0))
 19 |         return s.getsockname()[1]
 20 | 
 21 | 
 22 | @pytest.fixture(scope="module")
 23 | def live_server():
 24 |     """Spin up Uvicorn in a background thread and yield the base URL."""
 25 |     port = _find_free_port()
 26 |     config = uvicorn.Config("kit.api.app:app", host=SERVER_HOST, port=port, log_level="error")
 27 |     server = uvicorn.Server(config)
 28 | 
 29 |     thread = threading.Thread(target=server.run, daemon=True)
 30 |     thread.start()
 31 | 
 32 |     # Wait until server is ready
 33 |     timeout = 10
 34 |     start = time.time()
 35 |     while not server.started and (time.time() - start) < timeout:
 36 |         time.sleep(0.1)
 37 |     if not server.started:
 38 |         raise RuntimeError("Uvicorn server failed to start within timeout")
 39 | 
 40 |     base_url = f"http://{SERVER_HOST}:{port}"
 41 |     try:
 42 |         yield base_url
 43 |     finally:
 44 |         server.should_exit = True
 45 |         thread.join(timeout=5)
 46 | 
 47 | 
 48 | @pytest.fixture(scope="module")
 49 | def realistic_repo_path() -> Path:
 50 |     return Path(__file__).parent.parent / "fixtures" / "realistic_repo"
 51 | 
 52 | 
 53 | # ---------------- Tests -----------------
 54 | 
 55 | 
 56 | def test_end_to_end_file_tree(live_server: str, realistic_repo_path: Path):
 57 |     # 1. Open repo
 58 |     resp = requests.post(f"{live_server}/repository", json={"path_or_url": str(realistic_repo_path)})
 59 |     assert resp.status_code == 201
 60 |     repo_id = resp.json()["id"]
 61 | 
 62 |     # 2. Get file tree
 63 |     tree_resp = requests.get(f"{live_server}/repository/{repo_id}/file-tree")
 64 |     assert tree_resp.status_code == 200
 65 |     tree = tree_resp.json()
 66 |     assert any(item["path"].endswith("models/user.py") for item in tree)
 67 | 
 68 | 
 69 | def test_get_file_content_live(live_server: str, realistic_repo_path: Path):
 70 |     resp = requests.post(f"{live_server}/repository", json={"path_or_url": str(realistic_repo_path)})
 71 |     repo_id = resp.json()["id"]
 72 | 
 73 |     file_rel = "models/user.py"
 74 |     content_resp = requests.get(f"{live_server}/repository/{repo_id}/files/{file_rel}")
 75 |     assert content_resp.status_code == 200
 76 |     assert "class User" in content_resp.text
 77 | 
 78 | 
 79 | def test_symbol_and_usage_live(live_server: str, realistic_repo_path: Path):
 80 |     repo_id = requests.post(f"{live_server}/repository", json={"path_or_url": str(realistic_repo_path)}).json()["id"]
 81 | 
 82 |     sym_resp = requests.get(f"{live_server}/repository/{repo_id}/symbols", params={"file_path": "services/auth.py"})
 83 |     assert sym_resp.status_code == 200
 84 |     symbols = sym_resp.json()
 85 |     assert any(s["name"] == "login" for s in symbols)
 86 | 
 87 |     usage_resp = requests.get(
 88 |         f"{live_server}/repository/{repo_id}/usages",
 89 |         params={"symbol_name": "login", "symbol_type": "function"},
 90 |     )
 91 |     assert usage_resp.status_code == 200
 92 |     usages = usage_resp.json()
 93 |     assert usages, "Expected at least one usage of 'login'"
 94 | 
 95 | 
 96 | def test_search_and_delete_live(live_server: str, realistic_repo_path: Path):
 97 |     repo_id = requests.post(f"{live_server}/repository", json={"path_or_url": str(realistic_repo_path)}).json()["id"]
 98 | 
 99 |     # search
100 |     s_resp = requests.get(f"{live_server}/repository/{repo_id}/search", params={"q": "def", "pattern": "*.py"})
101 |     assert s_resp.status_code == 200
102 |     assert isinstance(s_resp.json(), list)
103 | 
104 |     # index
105 |     idx = requests.get(f"{live_server}/repository/{repo_id}/index")
106 |     assert idx.status_code == 200
107 |     data = idx.json()
108 |     assert "files" in data and "symbols" in data
109 | 
110 |     # delete
111 |     del_resp = requests.delete(f"{live_server}/repository/{repo_id}")
112 |     assert del_resp.status_code == 204
113 | 
114 |     # subsequent request 404s
115 |     r404 = requests.get(f"{live_server}/repository/{repo_id}/file-tree")
116 |     assert r404.status_code == 404
117 | 


--------------------------------------------------------------------------------
/tests/mcp/test_call_tool_content.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from mcp.types import CallToolResult, EmbeddedResource, TextContent
 3 | 
 4 | # The server module provides a fallback alias called `ResourceContent`.  Depending on
 5 | # the MCP SDK version, this *may* be the same object as `EmbeddedResource`, or a
 6 | # stub when running under an older spec version.  Importing it must never fail.
 7 | from kit.mcp.server import GetFileContentParams, GetFileTreeParams, KitServerLogic, ResourceContent
 8 | 
 9 | 
10 | def _dummy_repo(tmp_path):
11 |     """Create a tiny git-repo-looking directory we can open with KitServerLogic."""
12 |     (tmp_path / "dummy.py").write_text("print('hello')\n", encoding="utf-8")
13 |     # A .git dir is *not* required for Repository – it only checks the path exists.
14 |     return str(tmp_path)
15 | 
16 | 
17 | @pytest.fixture()
18 | def logic(tmp_path):
19 |     """Yield a fresh KitServerLogic instance with one opened repo."""
20 |     server_logic = KitServerLogic()
21 |     repo_id = server_logic.open_repository(_dummy_repo(tmp_path))
22 |     return server_logic, repo_id
23 | 
24 | 
25 | def test_resourcecontent_alias():
26 |     """`ResourceContent` should successfully import and be a `BaseModel` subclass."""
27 |     # The alias can point to `EmbeddedResource` (new SDK) or a stub class (old SDK).
28 |     assert hasattr(ResourceContent, "model_validate"), "ResourceContent should be a pydantic model"
29 |     # If the current SDK exposes EmbeddedResource, the alias should be identical.
30 |     if "EmbeddedResource" in EmbeddedResource.__name__:
31 |         assert ResourceContent is EmbeddedResource
32 | 
33 | 
34 | def test_get_file_content_returns_textcontent(logic):
35 |     """The call-tool logic for **get_file_content** must return TextContent."""
36 |     server_logic, repo_id = logic
37 | 
38 |     # Replicate the input model used by the call-tool handler.
39 |     args = GetFileContentParams(repo_id=repo_id, file_path="dummy.py")
40 | 
41 |     # The real call-tool first validates the path – do the same here to ensure the
42 |     # helper doesn't raise.
43 |     server_logic.get_file_content(args.repo_id, args.file_path)
44 | 
45 |     result = [TextContent(type="text", text=f"/repos/{args.repo_id}/files/{args.file_path}")]
46 | 
47 |     # Attempt to build a CallToolResult – this is what the MCP framework will do
48 |     # internally.  If the content list contains the wrong type, Pydantic validation
49 |     # will fail, reproducing the original bug.
50 |     ctr = CallToolResult(content=result)
51 |     assert isinstance(ctr.content[0], TextContent)
52 | 
53 | 
54 | def test_get_file_tree_returns_textcontent(logic):
55 |     """The call-tool logic for **get_file_tree** must return TextContent."""
56 |     server_logic, repo_id = logic
57 | 
58 |     args = GetFileTreeParams(repo_id=repo_id)
59 |     server_logic.get_file_tree(args.repo_id)  # Should not raise
60 | 
61 |     result = [TextContent(type="text", text=f"/repos/{args.repo_id}/tree")]
62 |     ctr = CallToolResult(content=result)
63 |     assert isinstance(ctr.content[0], TextContent)
64 | 


--------------------------------------------------------------------------------
/tests/sample_code/c_sample.c:
--------------------------------------------------------------------------------
1 | #include <stdio.h>
2 | 
3 | void greet(const char* name) {
4 |     printf("Hello %s\n", name);
5 | } 


--------------------------------------------------------------------------------
/tests/sample_code/go_sample.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import "fmt"
 4 | 
 5 | func Greet(name string) string {
 6 |     return fmt.Sprintf("Hello %s", name)
 7 | }
 8 | 
 9 | type Greeter struct {
10 |     Name string
11 | }
12 | 
13 | func (g Greeter) Greet() string {
14 |     return fmt.Sprintf("Hello %s", g.Name)
15 | } 


--------------------------------------------------------------------------------
/tests/sample_code/hcl_sample.hcl:
--------------------------------------------------------------------------------
1 | variable "region" {
2 |   description = "AWS region"
3 |   type        = string
4 |   default     = "us-west-2"
5 | } 


--------------------------------------------------------------------------------
/tests/sample_code/java_sample.java:
--------------------------------------------------------------------------------
 1 | public class Greeter {
 2 |     private final String name;
 3 | 
 4 |     public Greeter(String name) {
 5 |         this.name = name;
 6 |     }
 7 | 
 8 |     public String greet() {
 9 |         return "Hello " + name;
10 |     }
11 | } 


--------------------------------------------------------------------------------
/tests/sample_code/javascript_sample.js:
--------------------------------------------------------------------------------
 1 | export function greet(name) {
 2 |   return `Hello ${name}`;
 3 | }
 4 | 
 5 | export class Greeter {
 6 |   constructor(name) {
 7 |     this.name = name;
 8 |   }
 9 | 
10 |   greet() {
11 |     return `Hello ${this.name}`;
12 |   }
13 | } 


--------------------------------------------------------------------------------
/tests/sample_code/python_sample.py:
--------------------------------------------------------------------------------
 1 | def greet(name: str) -> str:
 2 |     return f"Hello {name}"
 3 | 
 4 | 
 5 | class Greeter:
 6 |     def __init__(self, name: str):
 7 |         self.name = name
 8 | 
 9 |     def greet(self) -> str:
10 |         return f"Hello {self.name}"
11 | 


--------------------------------------------------------------------------------
/tests/sample_code/ruby_sample.rb:
--------------------------------------------------------------------------------
 1 | def greet(name)
 2 |   "Hello #{name}"
 3 | end
 4 | 
 5 | class Greeter
 6 |   def initialize(name)
 7 |     @name = name
 8 |   end
 9 | 
10 |   def greet
11 |     "Hello #{@name}"
12 |   end
13 | end 


--------------------------------------------------------------------------------
/tests/sample_code/rust_sample.rs:
--------------------------------------------------------------------------------
 1 | pub fn greet(name: &str) -> String {
 2 |     format!("Hello {}", name)
 3 | }
 4 | 
 5 | pub struct Greeter<'a> {
 6 |     pub name: &'a str,
 7 | }
 8 | 
 9 | impl<'a> Greeter<'a> {
10 |     pub fn greet(&self) -> String {
11 |         format!("Hello {}", self.name)
12 |     }
13 | } 


--------------------------------------------------------------------------------
/tests/sample_code/tf_sample.tf:
--------------------------------------------------------------------------------
1 | resource "aws_s3_bucket" "bucket" {
2 |   bucket = "my-bucket"
3 | } 


--------------------------------------------------------------------------------
/tests/sample_code/tsx_sample.tsx:
--------------------------------------------------------------------------------
1 | // @ts-nocheck
2 | 
3 | import React from 'react';
4 | 
5 | export function MyComponent() {
6 |   return <div>Hello</div>;
7 | } 


--------------------------------------------------------------------------------
/tests/sample_code/typescript_sample.ts:
--------------------------------------------------------------------------------
 1 | export function greet(name: string): string {
 2 |   return `Hello ${name}`;
 3 | }
 4 | 
 5 | export class Greeter {
 6 |   constructor(private name: string) {}
 7 | 
 8 |   greet(): string {
 9 |     return `Hello ${this.name}`;
10 |   }
11 | } 


--------------------------------------------------------------------------------
/tests/test_code_searcher.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | 
 4 | from kit import CodeSearcher
 5 | 
 6 | 
 7 | def test_search_text_basic():
 8 |     with tempfile.TemporaryDirectory() as tmpdir:
 9 |         pyfile = os.path.join(tmpdir, "foo.py")
10 |         with open(pyfile, "w") as f:
11 |             f.write("""
12 | def foo(): pass
13 | 
14 | def bar(): pass
15 | """)
16 |         searcher = CodeSearcher(tmpdir)
17 |         matches = searcher.search_text("def foo")
18 |         assert any("foo" in m["line"] for m in matches)
19 |         matches_bar = searcher.search_text("bar")
20 |         assert any("bar" in m["line"] for m in matches_bar)
21 | 
22 | 
23 | def test_search_text_multiple_files():
24 |     with tempfile.TemporaryDirectory() as tmpdir:
25 |         files = ["a.py", "b.py", "c.txt"]
26 |         for fname in files:
27 |             with open(os.path.join(tmpdir, fname), "w") as f:
28 |                 f.write(f"def {fname[:-3]}(): pass\n")
29 |         searcher = CodeSearcher(tmpdir)
30 |         matches = searcher.search_text("def ", file_pattern="*.py")
31 |         assert len(matches) == 2
32 |         assert all(m["file"].endswith(".py") for m in matches)
33 | 
34 | 
35 | def test_search_text_regex():
36 |     with tempfile.TemporaryDirectory() as tmpdir:
37 |         pyfile = os.path.join(tmpdir, "foo.py")
38 |         with open(pyfile, "w") as f:
39 |             f.write("def foo(): pass\ndef bar(): pass\n")
40 |         searcher = CodeSearcher(tmpdir)
41 |         matches = searcher.search_text(r"def [fb]oo")
42 |         assert any("foo" in m["line"] for m in matches)
43 |         assert not any("bar" in m["line"] for m in matches)
44 | 


--------------------------------------------------------------------------------
/tests/test_context_assembler.py:
--------------------------------------------------------------------------------
 1 | from kit import ContextAssembler, Repository
 2 | 
 3 | 
 4 | def test_context_assembler_basic(tmp_path):
 5 |     # Create a simple repo with one file
 6 |     file_path = tmp_path / "foo.py"
 7 |     file_path.write_text("print('hi')\n")
 8 | 
 9 |     repo = Repository(str(tmp_path))
10 |     assembler = ContextAssembler(repo)
11 |     assembler.add_file("foo.py")
12 |     ctx = assembler.format_context()
13 | 
14 |     assert "foo.py" in ctx
15 |     assert "print('hi')" in ctx
16 | 


--------------------------------------------------------------------------------
/tests/test_context_assembler_limits.py:
--------------------------------------------------------------------------------
 1 | from kit import Repository
 2 | 
 3 | 
 4 | def make_repo(tmp_path):
 5 |     repo_root = tmp_path / "repo"
 6 |     repo_root.mkdir()
 7 |     return repo_root
 8 | 
 9 | 
10 | def test_skip_by_filename(tmp_path):
11 |     repo_root = make_repo(tmp_path)
12 |     (repo_root / "package-lock.json").write_text("{}\n")
13 | 
14 |     repo = Repository(str(repo_root))
15 |     assembler = repo.get_context_assembler()
16 | 
17 |     assembler.add_file(
18 |         "package-lock.json",
19 |         skip_if_name_in=["package-lock.json"],
20 |     )
21 | 
22 |     ctx = assembler.format_context()
23 |     # lock file should be skipped -> context empty
24 |     assert "package-lock.json" not in ctx
25 | 
26 | 
27 | def test_skip_by_max_lines(tmp_path):
28 |     repo_root = make_repo(tmp_path)
29 |     big_file = repo_root / "big.py"
30 |     big_file.write_text("\n".join(["print('x')"] * 500))
31 | 
32 |     repo = Repository(str(repo_root))
33 |     assembler = repo.get_context_assembler()
34 | 
35 |     assembler.add_file("big.py", max_lines=100)
36 |     ctx = assembler.format_context()
37 |     assert "big.py" not in ctx
38 | 
39 | 
40 | def test_include_small_file(tmp_path):
41 |     repo_root = make_repo(tmp_path)
42 |     small = repo_root / "small.py"
43 |     small.write_text("print('hi')\n")
44 | 
45 |     repo = Repository(str(repo_root))
46 |     assembler = repo.get_context_assembler()
47 |     assembler.add_file("small.py", max_lines=100)
48 | 
49 |     ctx = assembler.format_context()
50 |     assert "small.py" in ctx
51 | 


--------------------------------------------------------------------------------
/tests/test_cross_file_impact.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import tempfile
 4 | 
 5 | from kit import Repository
 6 | 
 7 | TEST_FILES = {
 8 |     "a.py": """
 9 | def foo():
10 |     pass
11 | 
12 | def bar():
13 |     foo()
14 | """,
15 |     "b.py": """
16 | from a import foo
17 | 
18 | def baz():
19 |     foo()
20 | """,
21 | }
22 | 
23 | 
24 | def setup_test_repo():
25 |     tmpdir = tempfile.mkdtemp()
26 |     for fname, content in TEST_FILES.items():
27 |         with open(os.path.join(tmpdir, fname), "w") as f:
28 |             f.write(content)
29 |     return tmpdir
30 | 
31 | 
32 | def test_find_symbol_usages():
33 |     repo_dir = setup_test_repo()
34 |     try:
35 |         repository = Repository(repo_dir)
36 |         usages = repository.find_symbol_usages("foo", symbol_type="function")
37 |         usage_files = sorted(set(u["file"].split(os.sep)[-1] for u in usages))
38 |         assert "a.py" in usage_files
39 |         assert "b.py" in usage_files
40 |         # Should find both the definition and calls/imports
41 |         found_types = set(u.get("type") for u in usages if u.get("type"))
42 |         assert "function" in found_types
43 |         # Should find at least one usage with context containing 'foo()'
44 |         assert any("foo()" in (u.get("context") or "") for u in usages)
45 |     finally:
46 |         shutil.rmtree(repo_dir)
47 | 


--------------------------------------------------------------------------------
/tests/test_docstring_incremental.py:
--------------------------------------------------------------------------------
 1 | """Failing tests specifying desired incremental behaviour for DocstringIndexer."""
 2 | 
 3 | import hashlib
 4 | import shutil
 5 | from pathlib import Path
 6 | from unittest.mock import MagicMock
 7 | 
 8 | import pytest
 9 | 
10 | from kit import DocstringIndexer, Repository
11 | from kit.vector_searcher import VectorDBBackend
12 | 
13 | FIXTURE_REPO = Path(__file__).parent / "fixtures" / "realistic_repo"
14 | 
15 | 
16 | class DummyBackend(VectorDBBackend):
17 |     """Minimal in-memory backend with delete support for tests."""
18 | 
19 |     def __init__(self):
20 |         self.embeddings = []
21 |         self.metadatas = []
22 |         self.ids = []
23 | 
24 |     def add(self, embeddings, metadatas, ids=None):
25 |         self.embeddings.extend(embeddings)
26 |         self.metadatas.extend(metadatas)
27 |         self.ids.extend(ids or [str(i) for i in range(len(metadatas))])
28 | 
29 |     def query(self, embedding, top_k):
30 |         return self.metadatas[:top_k]
31 | 
32 |     def persist(self):
33 |         pass
34 | 
35 |     def count(self):
36 |         return len(self.metadatas)
37 | 
38 |     def delete(self, ids):
39 |         for _id in ids:
40 |             if _id in self.ids:
41 |                 idx = self.ids.index(_id)
42 |                 self.ids.pop(idx)
43 |                 self.embeddings.pop(idx)
44 |                 self.metadatas.pop(idx)
45 | 
46 | 
47 | @pytest.fixture(scope="function")
48 | def realistic_repo(tmp_path):
49 |     # Copy fixture repo to tmp so we can mutate files safely
50 |     workdir = tmp_path / "repo"
51 |     shutil.copytree(FIXTURE_REPO, workdir)
52 |     return Repository(str(workdir))
53 | 
54 | 
55 | def _hash_file(path: Path) -> str:
56 |     return hashlib.sha1(path.read_bytes()).hexdigest()
57 | 
58 | 
59 | def test_incremental_indexing(realistic_repo):
60 |     """Initial build -> modify one file -> rebuild should only upsert that file's symbols."""
61 | 
62 |     summarizer = MagicMock()
63 |     summarizer.summarize_function.side_effect = lambda p, s: f"F-{s}"
64 |     summarizer.summarize_class.side_effect = lambda p, s: f"C-{s}"
65 | 
66 |     def embed_fn(t):
67 |         return [float(len(t))]
68 | 
69 |     backend = DummyBackend()
70 |     indexer = DocstringIndexer(realistic_repo, summarizer, embed_fn, backend=backend)
71 | 
72 |     # 1. initial build
73 |     indexer.build(level="symbol", force=True)
74 |     initial_count = backend.count()
75 | 
76 |     # 2. mutate utils.py (append a comment)
77 |     utils_file = Path(realistic_repo.repo_path) / "utils.py"
78 |     utils_file.write_text(utils_file.read_text() + "\n# change\n")
79 | 
80 |     indexer.build(level="symbol")  # incremental
81 | 
82 |     # summarizer should have been called for symbols in utils.py only
83 |     assert summarizer.summarize_function.call_count > 0
84 |     # naive check: count unchanged (upsert not duplicate)
85 |     assert backend.count() == initial_count
86 | 
87 |     # 3. delete models/user.py -> rebuild
88 |     user_file = Path(realistic_repo.repo_path) / "models" / "user.py"
89 |     user_file.unlink()
90 | 
91 |     indexer.build(level="symbol")
92 | 
93 |     # count should now be reduced (symbols from user.py removed)
94 |     assert backend.count() < initial_count
95 | 


--------------------------------------------------------------------------------
/tests/test_hcl_symbols.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import tempfile
  3 | 
  4 | from kit import Repository
  5 | 
  6 | 
  7 | def test_hcl_symbol_extraction():
  8 |     hcl_content = """
  9 | provider "aws" {
 10 |   region = "us-west-2"
 11 | }
 12 | 
 13 | resource "aws_instance" "web" {
 14 |   ami           = "ami-0c55b159cbfa1f0"
 15 |   instance_type = "t2.micro"
 16 |   tags = {
 17 |     Name = "WebServer"
 18 |   }
 19 | }
 20 | 
 21 | resource "aws_s3_bucket" "bucket" {
 22 |   bucket = "my-example-bucket-123456"
 23 |   acl    = "private"
 24 | }
 25 | 
 26 | variable "instance_count" {
 27 |   description = "Number of EC2 instances to launch"
 28 |   type        = number
 29 |   default     = 2
 30 | }
 31 | 
 32 | output "instance_id" {
 33 |   value = aws_instance.web.id
 34 | }
 35 | 
 36 | locals {
 37 |   environment = "dev"
 38 |   owner       = "test-user"
 39 | }
 40 | 
 41 | module "vpc" {
 42 |   source = "terraform-aws-modules/vpc/aws"
 43 |   name   = "example-vpc"
 44 |   cidr   = "10.0.0.0/16"
 45 | }
 46 | """
 47 |     with tempfile.TemporaryDirectory() as tmpdir:
 48 |         hcl_path = os.path.join(tmpdir, "main.tf")
 49 |         with open(hcl_path, "w") as f:
 50 |             f.write(hcl_content)
 51 |         repository = Repository(tmpdir)
 52 |         symbols = repository.extract_symbols("main.tf")
 53 |         types = {s["type"] for s in symbols}
 54 |         names = {s["name"] for s in symbols if "name" in s}
 55 | 
 56 |         # Expected symbols based on HCL query and updated extractor logic
 57 |         expected = {
 58 |             "aws",  # provider "aws"
 59 |             "aws_instance.web",  # resource "aws_instance" "web"
 60 |             "aws_s3_bucket.bucket",  # resource "aws_s3_bucket" "bucket"
 61 |             "instance_count",  # variable "instance_count"
 62 |             "instance_id",  # output "instance_id"
 63 |             "vpc",  # module "vpc"
 64 |             "locals",  # locals block
 65 |             # Note: no terraform block in this fixture
 66 |         }
 67 | 
 68 |         # Assert individual expected symbols exist
 69 |         for name in expected:
 70 |             assert name in names, f"Expected name {name} not found in {names}"
 71 | 
 72 |         # Check types for resource blocks (should be unquoted resource type)
 73 |         resource_types = {s["subtype"] for s in symbols if s["type"] == "resource" and "subtype" in s}
 74 |         assert "aws_instance" in resource_types
 75 |         assert "aws_s3_bucket" in resource_types
 76 | 
 77 |         # Check for provider and locals types
 78 |         assert "provider" in types
 79 |         assert "locals" in types
 80 | 
 81 | 
 82 | def test_hcl_symbol_edge_cases():
 83 |     hcl_content = """
 84 | resource "aws_security_group" "sg" {
 85 |   name        = "allow_tls"
 86 |   description = "Allow TLS inbound traffic"
 87 | }
 88 | 
 89 | resource "aws_lb_listener" "listener" {
 90 |   port     = 443
 91 |   protocol = "HTTPS"
 92 | }
 93 | 
 94 | terraform {
 95 |   required_providers {
 96 |     aws = {
 97 |       source  = "hashicorp/aws"
 98 |       version = "~> 2.0"
 99 |     }
100 |   }
101 | }
102 | """
103 |     with tempfile.TemporaryDirectory() as tmpdir:
104 |         hcl_path = os.path.join(tmpdir, "main.tf")
105 |         with open(hcl_path, "w") as f:
106 |             f.write(hcl_content)
107 |         repository = Repository(tmpdir)
108 |         symbols = repository.extract_symbols("main.tf")
109 |         types = {s["type"] for s in symbols}
110 |         subtypes = {s["subtype"] for s in symbols if "subtype" in s}
111 |         {s["name"] for s in symbols if "name" in s}
112 |         # Should include the unnamed terraform block
113 |         assert "terraform" in types or "block" in types
114 |         # Should include specific resource subtypes (unquoted)
115 |         assert "aws_security_group" in subtypes
116 |         assert "aws_lb_listener" in subtypes
117 | 


--------------------------------------------------------------------------------
/tests/test_java_symbols.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | 
 4 | from kit import Repository
 5 | 
 6 | 
 7 | def _extract(tmpdir: str, filename: str, content: str):
 8 |     path = os.path.join(tmpdir, filename)
 9 |     with open(path, "w") as f:
10 |         f.write(content)
11 |     return Repository(tmpdir).extract_symbols(filename)
12 | 
13 | 
14 | def test_java_symbols():
15 |     code = """
16 | public class Foo {
17 |     public int x;
18 |     public Foo() {}
19 |     public void bar() {}
20 | }
21 | 
22 | interface Baz {}
23 | 
24 | enum Color { RED, GREEN }
25 | """
26 |     with tempfile.TemporaryDirectory() as tmpdir:
27 |         symbols = _extract(tmpdir, "Foo.java", code)
28 |         names = {s["name"] for s in symbols}
29 |         assert {"Foo", "bar", "Baz", "Color"}.issubset(names)
30 | 


--------------------------------------------------------------------------------
/tests/test_line_ref_fix.py:
--------------------------------------------------------------------------------
 1 | from kit.pr_review.line_ref_fixer import LineRefFixer
 2 | 
 3 | SIMPLE_DIFF = """diff --git a/foo.py b/foo.py
 4 | @@ -10,3 +10,4 @@ def func():
 5 |      a = 1
 6 | -    b = 2
 7 | +    b = 3
 8 | +    c = 4
 9 | """
10 | 
11 | BAD_COMMENT = "Issue at foo.py:10 is wrong. Another range foo.py:10-11 is wrong too."
12 | 
13 | 
14 | def test_line_ref_fix_simple():
15 |     fixed, fixes = LineRefFixer.fix_comment(BAD_COMMENT, SIMPLE_DIFF)
16 | 
17 |     # Both referenced lines 10 and 10-11 are now legal; fixer should make no changes
18 |     assert fixed == BAD_COMMENT
19 |     assert fixes == []
20 | 


--------------------------------------------------------------------------------
/tests/test_registry_deterministic.py:
--------------------------------------------------------------------------------
  1 | """Tests for deterministic ID generation in registry with ref parameters."""
  2 | 
  3 | import tempfile
  4 | from pathlib import Path
  5 | 
  6 | from src.kit.api.registry import PersistentRepoRegistry, _canonical, path_to_id
  7 | 
  8 | 
  9 | class TestRegistryDeterministicIDs:
 10 |     """Test that registry generates deterministic IDs based on path+ref combinations."""
 11 | 
 12 |     def test_same_path_ref_same_id(self):
 13 |         """Test that same path+ref combination always returns same ID."""
 14 |         registry = PersistentRepoRegistry()
 15 | 
 16 |         # Add same path+ref multiple times
 17 |         id1 = registry.add(".", "main")
 18 |         id2 = registry.add(".", "main")
 19 |         id3 = registry.add(".", "main")
 20 | 
 21 |         # Should all be the same
 22 |         assert id1 == id2 == id3
 23 | 
 24 |     def test_different_refs_different_ids(self):
 25 |         """Test that different refs for same path return different IDs."""
 26 |         registry = PersistentRepoRegistry()
 27 | 
 28 |         # Same path, different refs
 29 |         id_main = registry.add(".", "main")
 30 |         id_tag = registry.add(".", "v1.0.0")
 31 |         id_commit = registry.add(".", "abc123def456")
 32 |         id_none = registry.add(".")
 33 | 
 34 |         # All should be different
 35 |         ids = [id_main, id_tag, id_commit, id_none]
 36 |         assert len(set(ids)) == len(ids), f"Expected all different IDs, got: {ids}"
 37 | 
 38 |     def test_canonical_path_includes_ref(self):
 39 |         """Test that _canonical function properly includes ref parameter."""
 40 |         path = "."
 41 | 
 42 |         canon_main = _canonical(path, "main")
 43 |         canon_tag = _canonical(path, "v1.0.0")
 44 |         canon_none = _canonical(path, None)
 45 | 
 46 |         # Should include ref in canonical representation
 47 |         assert "@main" in canon_main
 48 |         assert "@v1.0.0" in canon_tag
 49 |         assert canon_main != canon_tag
 50 |         assert canon_main != canon_none
 51 | 
 52 |     def test_path_to_id_deterministic(self):
 53 |         """Test that path_to_id function is deterministic."""
 54 |         canon1 = _canonical(".", "main")
 55 |         canon2 = _canonical(".", "main")
 56 |         canon3 = _canonical(".", "v1.0.0")
 57 | 
 58 |         id1 = path_to_id(canon1)
 59 |         id2 = path_to_id(canon2)
 60 |         id3 = path_to_id(canon3)
 61 | 
 62 |         # Same canonical path should give same ID
 63 |         assert id1 == id2
 64 |         # Different canonical path should give different ID
 65 |         assert id1 != id3
 66 | 
 67 |     def test_remote_url_with_ref(self):
 68 |         """Test deterministic IDs for remote URLs with refs."""
 69 |         registry = PersistentRepoRegistry()
 70 | 
 71 |         url = "https://github.com/owner/repo"
 72 | 
 73 |         id1 = registry.add(url, "main")
 74 |         id2 = registry.add(url, "main")
 75 |         id3 = registry.add(url, "v1.0.0")
 76 |         id4 = registry.add(url)  # No ref
 77 | 
 78 |         # Same URL+ref should be same
 79 |         assert id1 == id2
 80 |         # Different refs should be different
 81 |         assert id1 != id3
 82 |         assert id1 != id4
 83 |         assert id3 != id4
 84 | 
 85 |     def test_canonical_remote_url_format(self):
 86 |         """Test canonical format for remote URLs."""
 87 |         url = "https://github.com/owner/repo"
 88 | 
 89 |         canon_main = _canonical(url, "main")
 90 |         canon_none = _canonical(url, None)
 91 | 
 92 |         # Should include ref for remote URLs
 93 |         assert canon_main.endswith("@main")
 94 |         assert canon_none.endswith("@HEAD")  # Default for remote URLs
 95 | 
 96 |     def test_local_path_resolution(self):
 97 |         """Test that local paths are resolved consistently."""
 98 |         with tempfile.TemporaryDirectory() as temp_dir:
 99 |             # Create test file
100 |             test_file = Path(temp_dir) / "test.py"
101 |             test_file.write_text("def hello(): pass")
102 | 
103 |             # Test relative vs absolute paths
104 |             canon1 = _canonical(temp_dir, "main")
105 |             canon2 = _canonical(str(Path(temp_dir).resolve()), "main")
106 | 
107 |             # Should resolve to same canonical representation
108 |             id1 = path_to_id(canon1)
109 |             id2 = path_to_id(canon2)
110 |             assert id1 == id2
111 | 


--------------------------------------------------------------------------------
/tests/test_repo.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import tempfile
  3 | 
  4 | import pytest
  5 | 
  6 | from kit import Repository
  7 | 
  8 | 
  9 | def test_repo_get_file_tree_and_symbols():
 10 |     with tempfile.TemporaryDirectory() as tmpdir:
 11 |         os.makedirs(f"{tmpdir}/foo/bar")
 12 |         with open(f"{tmpdir}/foo/bar/baz.py", "w") as f:
 13 |             f.write("""
 14 | class Foo:
 15 |     def bar(self): pass
 16 | 
 17 | def baz(): pass
 18 | """)
 19 |         repository = Repository(tmpdir)
 20 |         tree = repository.get_file_tree()
 21 |         assert any(item["path"].endswith("baz.py") for item in tree)
 22 |         assert any(item["is_dir"] and item["path"].endswith("foo/bar") for item in tree)
 23 |         symbols = repository.extract_symbols("foo/bar/baz.py")
 24 |         names = {s["name"] for s in symbols}
 25 |         assert "Foo" in names
 26 |         assert "baz" in names
 27 |         types = {s["type"] for s in symbols}
 28 |         assert "class" in types
 29 |         assert "function" in types
 30 | 
 31 | 
 32 | @pytest.mark.parametrize(
 33 |     ("structure", "expected_treepaths"),
 34 |     [
 35 |         (
 36 |             {
 37 |                 "a.py": "pass",
 38 |                 "b.py": "pass",
 39 |                 "c.txt": "test",
 40 |             },
 41 |             [
 42 |                 "a.py",
 43 |                 "b.py",
 44 |                 "c.txt",
 45 |             ],
 46 |         ),
 47 |         (
 48 |             {
 49 |                 "dir1/x.py": "pass",
 50 |                 "dir2/y.py": "pass",
 51 |             },
 52 |             [
 53 |                 "dir1",
 54 |                 "dir1/x.py",
 55 |                 "dir2",
 56 |                 "dir2/y.py",
 57 |             ],
 58 |         ),
 59 |         (
 60 |             {
 61 |                 "dir1/dir2/z.py": "pass",
 62 |             },
 63 |             [
 64 |                 "dir1",
 65 |                 "dir1/dir2",
 66 |                 "dir1/dir2/z.py",
 67 |             ],
 68 |         ),
 69 |         (
 70 |             {
 71 |                 ".gitignore": "dir1/a.py",
 72 |                 "dir1/a.py": "pass",
 73 |                 "dir2/b.py": "pass",
 74 |             },
 75 |             [
 76 |                 ".gitignore",
 77 |                 "dir2",
 78 |                 "dir2/b.py",
 79 |             ],
 80 |         ),
 81 |     ],
 82 | )
 83 | def test_repo_file_tree_various(structure, expected_treepaths):
 84 |     with tempfile.TemporaryDirectory() as tmpdir:
 85 |         for relpath, contents in structure.items():
 86 |             path = os.path.join(tmpdir, relpath)
 87 |             os.makedirs(os.path.dirname(path), exist_ok=True)
 88 |             with open(path, "w") as f:
 89 |                 f.write(contents)
 90 | 
 91 |         repository = Repository(tmpdir)
 92 |         tree = repository.get_file_tree()
 93 |         actual_treepaths = [x["path"] for x in tree]
 94 |         assert sorted(actual_treepaths) == sorted(expected_treepaths)
 95 | 
 96 | 
 97 | def test_repo_get_file_content():
 98 |     with tempfile.TemporaryDirectory() as tmpdir:
 99 |         # Setup: Create some test files
100 |         content1 = "Hello, world!\nThis is a test file."
101 |         file1_path = "dir1/file1.txt"
102 |         full_file1_path = os.path.join(tmpdir, file1_path)
103 |         os.makedirs(os.path.dirname(full_file1_path), exist_ok=True)
104 |         with open(full_file1_path, "w") as f:
105 |             f.write(content1)
106 | 
107 |         empty_file_path = "empty.txt"
108 |         full_empty_file_path = os.path.join(tmpdir, empty_file_path)
109 |         with open(full_empty_file_path, "w") as f:
110 |             pass  # Create an empty file
111 | 
112 |         repository = Repository(tmpdir)
113 | 
114 |         # Test 1: Read content from an existing file
115 |         retrieved_content1 = repository.get_file_content(file1_path)
116 |         assert retrieved_content1 == content1
117 | 
118 |         # Test 2: Read content from an empty file
119 |         retrieved_empty_content = repository.get_file_content(empty_file_path)
120 |         assert retrieved_empty_content == ""
121 | 
122 |         # Test 3: Attempt to read content from a non-existent file
123 |         non_existent_file_path = "non_existent.txt"
124 |         with pytest.raises(FileNotFoundError):
125 |             repository.get_file_content(non_existent_file_path)
126 | 
127 |         # Test 4: Attempt to read content from a directory (should also fail)
128 |         with pytest.raises(IOError):  # Or perhaps FileNotFoundError or IsADirectoryError, adjust as per actual behavior
129 |             repository.get_file_content("dir1")
130 | 


--------------------------------------------------------------------------------
/tests/test_repo_integration.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | 
 3 | from kit import Repository
 4 | 
 5 | 
 6 | def test_repo_index_and_chunking():
 7 |     with tempfile.TemporaryDirectory() as tmpdir:
 8 |         with open(f"{tmpdir}/a.py", "w") as f:
 9 |             f.write("""def foo(): pass\nclass Bar: pass\n""")
10 |         repository = Repository(tmpdir)
11 |         idx = repository.index()
12 |         assert "file_tree" in idx and "symbols" in idx
13 |         assert any("a.py" in f for f in idx["symbols"])
14 |         lines = repository.chunk_file_by_lines("a.py", max_lines=1)
15 |         assert len(lines) > 1
16 |         syms = repository.chunk_file_by_symbols("a.py")
17 |         names = {s["name"] for s in syms}
18 |         assert "foo" in names
19 |         assert "Bar" in names
20 |         ctx = repository.extract_context_around_line("a.py", 1)
21 |         assert ctx is not None
22 | 


--------------------------------------------------------------------------------
/tests/test_repo_mapper.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | 
 3 | from kit import RepoMapper
 4 | 
 5 | 
 6 | def test_get_file_tree():
 7 |     with tempfile.TemporaryDirectory() as tmpdir:
 8 |         # Create some files and dirs
 9 |         import os
10 | 
11 |         os.makedirs(f"{tmpdir}/foo/bar")
12 |         with open(f"{tmpdir}/foo/bar/baz.py", "w") as f:
13 |             f.write("def test(): pass\n")
14 |         mapper = RepoMapper(tmpdir)
15 |         tree = mapper.get_file_tree()
16 |         assert any(item["path"].endswith("baz.py") for item in tree)
17 |         assert any(item["is_dir"] and item["path"].endswith("foo/bar") for item in tree)
18 | 
19 | 
20 | def test_extract_symbols():
21 |     with tempfile.TemporaryDirectory() as tmpdir:
22 |         pyfile = f"{tmpdir}/a.py"
23 |         with open(pyfile, "w") as f:
24 |             f.write("""
25 | class Foo:
26 |     def bar(self): pass
27 | 
28 | def baz(): pass
29 | """)
30 |         mapper = RepoMapper(tmpdir)
31 |         symbols = mapper.extract_symbols("a.py")
32 |         names = {s["name"] for s in symbols}
33 |         assert "Foo" in names
34 |         assert "baz" in names
35 |         types = {s["type"] for s in symbols}
36 |         assert "class" in types
37 |         assert "function" in types
38 | 


--------------------------------------------------------------------------------
/tests/test_resource_loading.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | import unittest
 4 | from pathlib import Path
 5 | 
 6 | from kit.tree_sitter_symbol_extractor import TreeSitterSymbolExtractor
 7 | 
 8 | 
 9 | class ResourceLoadingTest(unittest.TestCase):
10 |     """Tests that verify symbol extraction works outside the kit repository directory."""
11 | 
12 |     def test_extraction_from_different_working_directory(self):
13 |         """Verify that symbol extraction works when run from a different working directory."""
14 |         # Save current working directory
15 |         original_cwd = os.getcwd()
16 | 
17 |         try:
18 |             # Create and change to a temporary directory completely outside the repo
19 |             with tempfile.TemporaryDirectory() as temp_dir:
20 |                 os.chdir(temp_dir)
21 | 
22 |                 # Confirm we're outside the kit repository
23 |                 kit_repo_path = Path(original_cwd)
24 |                 current_path = Path(os.getcwd())
25 |                 self.assertNotEqual(kit_repo_path, current_path)
26 |                 self.assertFalse(current_path.is_relative_to(kit_repo_path))
27 | 
28 |                 # Now try to extract symbols - this would fail if the code relies on repo-relative paths
29 |                 python_code = "def test_function():\n    pass\n\nclass TestClass:\n    def method(self):\n        pass"
30 |                 symbols = TreeSitterSymbolExtractor.extract_symbols(".py", python_code)
31 | 
32 |                 # Verify extraction worked
33 |                 self.assertGreater(len(symbols), 0, "Should extract at least one symbol")
34 |                 symbol_names = {s["name"] for s in symbols}
35 |                 self.assertIn("test_function", symbol_names)
36 |                 self.assertIn("TestClass", symbol_names)
37 | 
38 |                 # Optional: Test other languages too
39 |                 js_code = "function foo() {}\nclass Bar {}"
40 |                 js_symbols = TreeSitterSymbolExtractor.extract_symbols(".js", js_code)
41 |                 self.assertGreater(len(js_symbols), 0, "Should extract JavaScript symbols")
42 | 
43 |         finally:
44 |             # Restore original working directory
45 |             os.chdir(original_cwd)
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     unittest.main()
50 | 


--------------------------------------------------------------------------------
/tests/test_ruby_c_symbols.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Tests for Ruby and C symbol extraction."""
 3 | 
 4 | import os
 5 | import tempfile
 6 | 
 7 | from kit import Repository
 8 | 
 9 | 
10 | def _extract(tmpdir: str, filename: str, content: str):
11 |     path = os.path.join(tmpdir, filename)
12 |     with open(path, "w", encoding="utf-8") as f:
13 |         f.write(content)
14 |     return Repository(tmpdir).extract_symbols(filename)
15 | 
16 | 
17 | def test_ruby_symbols():
18 |     code = """
19 | class Foo; end
20 | module Bar; end
21 | 
22 | def baz; end
23 | class Foo
24 |   def qux; end
25 | end
26 | """
27 |     with tempfile.TemporaryDirectory() as tmpdir:
28 |         symbols = _extract(tmpdir, "main.rb", code)
29 |         names = {s["name"] for s in symbols}
30 |         assert {"Foo", "Bar", "baz", "qux"}.issubset(names)
31 | 
32 | 
33 | def test_c_symbols():
34 |     code = """
35 | struct Person { int age; };
36 | 
37 | enum Color { RED, GREEN };
38 | 
39 | int add(int a,int b){ return a+b; }
40 | """
41 |     with tempfile.TemporaryDirectory() as tmpdir:
42 |         symbols = _extract(tmpdir, "main.c", code)
43 |         names = {s["name"] for s in symbols}
44 |         assert {"Person", "Color", "add"}.issubset(names)
45 | 


--------------------------------------------------------------------------------
/tests/test_sample_code_extraction.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | 
 5 | from kit.tree_sitter_symbol_extractor import TreeSitterSymbolExtractor
 6 | 
 7 | # Map each sample fixture to the set of symbol names we must find.
 8 | SAMPLE_EXPECTATIONS = {
 9 |     "python_sample.py": {"greet", "Greeter"},
10 |     "javascript_sample.js": {"greet", "Greeter"},
11 |     "typescript_sample.ts": {"greet", "Greeter"},
12 |     "tsx_sample.tsx": {"MyComponent"},
13 |     "go_sample.go": {"Greet", "Greeter"},
14 |     "rust_sample.rs": {"greet", "Greeter"},
15 |     "c_sample.c": {"greet"},
16 |     "ruby_sample.rb": {"greet", "Greeter"},
17 |     "java_sample.java": {"Greeter"},
18 | }
19 | 
20 | SAMPLES_DIR = Path(__file__).parent / "sample_code"
21 | 
22 | 
23 | @pytest.mark.parametrize("filename,expected", SAMPLE_EXPECTATIONS.items())
24 | def test_sample_symbol_extraction(filename: str, expected: set[str]):
25 |     path = SAMPLES_DIR / filename
26 |     assert path.exists(), f"Sample file missing: {path}"
27 | 
28 |     code = path.read_text()
29 |     ext = path.suffix  # includes the leading dot
30 | 
31 |     # Ensure we actually have support for this language in the environment.
32 |     parser = TreeSitterSymbolExtractor.get_parser(ext)
33 |     query = TreeSitterSymbolExtractor.get_query(ext)
34 |     if not parser or not query:
35 |         pytest.skip(f"Language for {ext} not supported in this environment")
36 | 
37 |     symbols = TreeSitterSymbolExtractor.extract_symbols(ext, code)
38 |     names = {s["name"] for s in symbols if "name" in s}
39 | 
40 |     missing = expected - names
41 |     assert not missing, f"Expected symbols not found in {filename}: {missing}"
42 | 


--------------------------------------------------------------------------------
/tests/test_summarizer.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | 
 5 | from kit.summaries import LLMError, Summarizer
 6 | 
 7 | 
 8 | class FakeRepo:
 9 |     """Minimal fake Repository with in-memory file storage."""
10 | 
11 |     def __init__(self, files, local_path_str: str = "."):
12 |         self._files = files
13 |         self.local_path = Path(local_path_str)
14 | 
15 |     def get_file_content(self, path: str) -> str:
16 |         if path not in self._files:
17 |             raise FileNotFoundError(path)
18 |         return self._files[path]
19 | 
20 |     def get_abs_path(self, path: str) -> str:
21 |         # For FakeRepo, paths are already "absolute" in its context or not relevant.
22 |         return path
23 | 
24 | 
25 | # --- Helper fake OpenAI client --------------------------------------------
26 | 
27 | 
28 | class _FakeCompletion:
29 |     def __init__(self, content: str):
30 |         # Mimic OpenAI response object shape we access in Summarizer
31 |         self.choices = [type("_Choice", (), {"message": type("_Msg", (), {"content": content})()})]
32 |         self.usage = None
33 | 
34 | 
35 | class _FakeChatCompletions:
36 |     def __init__(self, content: str, raise_exc: bool = False):
37 |         self._content = content
38 |         self._raise = raise_exc
39 | 
40 |     def create(self, *args, **kwargs):
41 |         if self._raise:
42 |             raise RuntimeError("API down")
43 |         return _FakeCompletion(self._content)
44 | 
45 | 
46 | class FakeOpenAI:
47 |     """Mimics the parts of openai.OpenAI used by Summarizer."""
48 | 
49 |     def __init__(self, summary: str = "Fake summary", raise_exc: bool = False):
50 |         self.chat = type("_Chat", (), {"completions": _FakeChatCompletions(summary, raise_exc)})()
51 | 
52 | 
53 | # ---------------------------------------------------------------------------
54 | 
55 | 
56 | def test_summarize_file_happy():
57 |     repo = FakeRepo({"foo.py": "print('hello')"})
58 |     client = FakeOpenAI("This file prints hello")
59 |     summarizer = Summarizer(repo, llm_client=client)
60 |     summary = summarizer.summarize_file("foo.py")
61 |     assert summary == "This file prints hello"
62 | 
63 | 
64 | def test_summarize_file_not_found():
65 |     repo = FakeRepo({})
66 |     summarizer = Summarizer(repo, llm_client=FakeOpenAI())
67 |     with pytest.raises(FileNotFoundError):
68 |         summarizer.summarize_file("missing.py")
69 | 
70 | 
71 | def test_summarize_llm_error():
72 |     repo = FakeRepo({"bar.py": "print('x')"})
73 |     error_client = FakeOpenAI(raise_exc=True)
74 |     summarizer = Summarizer(repo, llm_client=error_client)
75 |     with pytest.raises(LLMError):
76 |         summarizer.summarize_file("bar.py")
77 | 


--------------------------------------------------------------------------------
/tests/test_symbol_extraction_multilang.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from kit.tree_sitter_symbol_extractor import TreeSitterSymbolExtractor
 4 | 
 5 | SAMPLES = {
 6 |     ".py": "def foo():\n    pass\n\nclass Bar:\n    pass\n",
 7 |     ".js": "function foo() {}\nclass Bar {}\n",
 8 |     ".go": "package main\n\nfunc foo() {}\n\ntype Bar struct{}\n",
 9 |     ".java": "class Bar { void foo() {} }\n",
10 |     ".rs": "fn foo() {}\nstruct Bar;\n",
11 | }
12 | 
13 | 
14 | @pytest.mark.parametrize("ext,code", list(SAMPLES.items()))
15 | def test_symbol_extraction(ext: str, code: str):
16 |     # Ensure tree-sitter has a parser+query for this extension
17 |     parser = TreeSitterSymbolExtractor.get_parser(ext)
18 |     query = TreeSitterSymbolExtractor.get_query(ext)
19 |     if not parser or not query:
20 |         pytest.skip(f"Language for {ext} not supported in this environment")
21 | 
22 |     symbols = TreeSitterSymbolExtractor.extract_symbols(ext, code)
23 |     assert symbols, f"No symbols extracted for {ext}"
24 | 
25 |     # Simple sanity: expect 'foo' OR 'Bar' present
26 |     names = {s.get("name") for s in symbols}
27 |     assert any(name in names for name in {"foo", "Bar", "main"}), f"Expected symbols missing for {ext}: {names}"
28 | 


--------------------------------------------------------------------------------
/tests/test_symbol_extractor_additional.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import tempfile
  3 | from pathlib import Path
  4 | 
  5 | import pytest
  6 | 
  7 | from kit import Repository, TreeSitterSymbolExtractor
  8 | 
  9 | # ------------------ Helpers ------------------
 10 | 
 11 | 
 12 | def _write_tmp_and_extract(tmpdir: str, filename: str, content: str):
 13 |     """Utility that writes *content* to *filename* inside *tmpdir* and extracts symbols."""
 14 |     path = os.path.join(tmpdir, filename)
 15 |     with open(path, "w", encoding="utf-8") as f:
 16 |         f.write(content)
 17 |     repo = Repository(tmpdir)
 18 |     return repo.extract_symbols(filename)
 19 | 
 20 | 
 21 | # ------------------ Language Smoke-Tests ------------------
 22 | 
 23 | 
 24 | @pytest.mark.parametrize(
 25 |     "fname,source,expected_names",
 26 |     [
 27 |         pytest.param(
 28 |             "sample.js",
 29 |             """function foo() { return 1; }\nclass Bar {\n  baz() {}\n}\n""",
 30 |             {"foo", "Bar"},
 31 |         ),
 32 |         pytest.param(
 33 |             "sample.tsx",
 34 |             """import React from 'react';\nfunction MyComponent() { return <div/>; }\nexport class Helper {}\n""",
 35 |             {"MyComponent", "Helper"},
 36 |         ),
 37 |         pytest.param(
 38 |             "sample.rs",
 39 |             """fn foo() {}\nstruct Bar { x: i32 }\nenum Baz { A, B }\n""",
 40 |             {"foo", "Bar", "Baz"},
 41 |         ),
 42 |         pytest.param(
 43 |             "sample.c",
 44 |             """int add(int a, int b) { return a + b; }\nstruct Point { int x; int y; };\nenum Color { RED, GREEN, BLUE };\n""",
 45 |             {"add", "Point", "Color"},
 46 |         ),
 47 |         pytest.param(
 48 |             "sample.rb",
 49 |             """class Foo\n  def bar; end\nend\nmodule Baz; end\ndef top_level; end\n""",
 50 |             {"Foo", "Baz", "bar"},
 51 |         ),
 52 |         pytest.param(
 53 |             "Sample.java",
 54 |             """public class MyClass {\n  public void foo() {}\n}\ninterface MyInterface {}\nenum MyEnum { A, B; }\n""",
 55 |             {"MyClass", "MyInterface", "MyEnum"},
 56 |         ),
 57 |     ],
 58 | )
 59 | def test_symbol_extraction_smoke(fname, source, expected_names):
 60 |     """Ensure we can extract *some* expected symbols from every supported language."""
 61 |     with tempfile.TemporaryDirectory() as tmpdir:
 62 |         symbols = _write_tmp_and_extract(tmpdir, fname, source)
 63 |         # Just check names, as types might vary by tree-sitter query details
 64 |         extracted_names = {s["name"] for s in symbols}
 65 |         for name in expected_names:
 66 |             assert name in extracted_names, f"Expected symbol named '{name}' in {extracted_names} for file {fname}"
 67 | 
 68 | 
 69 | # ------------------ Error-Handling Tests ------------------
 70 | 
 71 | 
 72 | def test_missing_tags_scm(monkeypatch):
 73 |     """Simulate missing *tags.scm* for Ruby and ensure extractor fails gracefully."""
 74 | 
 75 |     TreeSitterSymbolExtractor._queries.pop(".rb", None)
 76 | 
 77 |     # Mock the read_text method to raise FileNotFoundError for ruby/tags.scm
 78 |     original_read_text = Path.read_text
 79 | 
 80 |     def _mock_read_text(self, *args, **kwargs):
 81 |         if "ruby" in str(self) and self.name == "tags.scm":
 82 |             raise FileNotFoundError(f"Simulated missing file: {self}")
 83 |         return original_read_text(self, *args, **kwargs)
 84 | 
 85 |     monkeypatch.setattr(Path, "read_text", _mock_read_text)
 86 | 
 87 |     symbols = TreeSitterSymbolExtractor.extract_symbols(".rb", "class Foo; end")
 88 |     assert symbols == [], "Expected empty symbol list when tags.scm is missing"
 89 | 
 90 | 
 91 | def test_corrupt_tags_scm(monkeypatch):
 92 |     """Simulate corrupt *tags.scm* content for Rust and ensure extractor fails gracefully."""
 93 | 
 94 |     TreeSitterSymbolExtractor._queries.pop(".rs", None)
 95 | 
 96 |     # Mock the read_text method to return invalid query content
 97 |     original_read_text = Path.read_text
 98 | 
 99 |     def _mock_read_text(self, *args, **kwargs):
100 |         if "rust" in str(self) and self.name == "tags.scm":
101 |             return "this is not valid tree-sitter query"
102 |         return original_read_text(self, *args, **kwargs)
103 | 
104 |     monkeypatch.setattr(Path, "read_text", _mock_read_text)
105 | 
106 |     symbols = TreeSitterSymbolExtractor.extract_symbols(".rs", "fn foo() {}")
107 |     assert symbols == [], "Expected empty symbol list when tags.scm is corrupt"
108 | 
109 | 
110 | def test_unsupported_extension():
111 |     symbols = TreeSitterSymbolExtractor.extract_symbols(".xyz", "whatever")
112 |     assert symbols == [], "Expected empty symbol list for unsupported extension"
113 | 


--------------------------------------------------------------------------------
/tests/test_tool_schemas.py:
--------------------------------------------------------------------------------
 1 | import kit
 2 | 
 3 | 
 4 | def test_get_tool_schemas_basic():
 5 |     """Ensure get_tool_schemas returns a non-empty list of JSON-serialisable dicts."""
 6 |     schemas = kit.get_tool_schemas()
 7 | 
 8 |     # Basic shape checks
 9 |     assert isinstance(schemas, list) and schemas, "Expected non-empty list"
10 |     assert all(isinstance(s, dict) for s in schemas), "Every schema should be a dict"
11 | 
12 |     # Required top-level keys from MCP Tool (name + inputSchema at minimum)
13 |     sample = schemas[0]
14 |     assert "name" in sample and "inputSchema" in sample, "Schema missing expected keys"
15 | 
16 |     # Spot-check a few known tool names
17 |     tool_names = {s["name"] for s in schemas}
18 |     for expected in {"open_repository", "search_code", "get_file_tree", "get_file_content"}:
19 |         assert expected in tool_names, f"{expected} should be part of tool schema list"
20 | 


--------------------------------------------------------------------------------
/tests/test_tree_sitter_languages.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from tree_sitter_language_pack import get_parser
 3 | 
 4 | LANG_SAMPLES = {
 5 |     "python": b"def foo():\n    return 42\n",
 6 |     "javascript": b"function foo() { return 42; }\n",
 7 |     "typescript": b"function foo(): number { return 42; }\n",
 8 |     "tsx": b"const foo = <T extends unknown>() => <div />;\n",
 9 |     "go": b"func foo() int { return 42 }\n",
10 |     "rust": b"fn foo() -> i32 { 42 }\n",
11 |     "hcl": b'variable "foo" { default = 42 }\n',
12 |     "c": b"int foo() { return 42; }\n",
13 | }
14 | 
15 | 
16 | @pytest.mark.parametrize("lang,src", LANG_SAMPLES.items())
17 | def test_parser_root_node(lang, src):
18 |     parser = get_parser(lang)
19 |     tree = parser.parse(src)
20 |     root = tree.root_node
21 |     assert root is not None
22 |     # Root node type should be non-empty string
23 |     assert isinstance(root.type, str) and root.type
24 |     # Should have at least one child node
25 |     assert root.child_count > 0
26 |     # Print for debug
27 |     print(f"{lang} root: {root.type}, children: {root.child_count}")
28 | 


--------------------------------------------------------------------------------
/tests/test_typescript_symbol_extraction.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | 
 5 | from kit import Repository
 6 | 
 7 | 
 8 | def test_typescript_symbol_extraction(tmp_path: Path):
 9 |     # Minimal TypeScript code with a function and a class
10 |     ts_code = """
11 | function foo() {}
12 | class Bar {}
13 | """
14 |     ts_file = tmp_path / "example.ts"
15 |     ts_file.write_text(ts_code)
16 |     repository = Repository(str(tmp_path))
17 |     try:
18 |         symbols = repository.extract_symbols("example.ts")
19 |     except Exception as e:
20 |         pytest.fail(f"Symbol extraction failed: {e}")
21 |     names_types = {(s.get("name"), s.get("type")) for s in symbols}
22 |     assert ("foo", "function") in names_types
23 |     assert ("Bar", "class") in names_types
24 | 


--------------------------------------------------------------------------------