├── .prettierignore ├── mise.toml ├── .gitmodules ├── src ├── indexer │ ├── codeintel │ │ ├── rust │ │ │ └── index.ts │ │ ├── php │ │ │ └── index.ts │ │ ├── dart │ │ │ ├── index.ts │ │ │ └── adapter.ts │ │ ├── java │ │ │ └── index.ts │ │ ├── swift │ │ │ └── index.ts │ │ ├── typescript │ │ │ └── index.ts │ │ ├── index.ts │ │ └── types.ts │ ├── language.ts │ ├── queue.ts │ ├── dart │ │ ├── config.ts │ │ └── pathKey.ts │ ├── codeintel.ts │ ├── migrations │ │ └── repo-merger.ts │ └── git.ts ├── client │ └── index.ts ├── shared │ ├── index.ts │ ├── adaptive-k-categories.ts │ ├── adaptive-k.ts │ ├── utils │ │ ├── glob.ts │ │ ├── validation.ts │ │ ├── retry.ts │ │ └── simpleYaml.ts │ ├── fs │ │ └── safePath.ts │ ├── config-validate-adaptive-k.ts │ ├── embedding.ts │ └── security │ │ └── config.ts ├── index.ts ├── server │ ├── bootstrap.ts │ ├── services │ │ ├── index.ts │ │ └── repo-resolver.ts │ ├── observability │ │ └── tracing.ts │ ├── context.ts │ └── abbreviations.ts └── eval │ └── metrics.ts ├── prettier.config.cjs ├── sql └── schema.sql ├── examples ├── claude-code-config.json └── codex-mcp-config.json ├── .mcp.json ├── types ├── index.d.ts ├── opentelemetry.d.ts └── duckdb.d.ts ├── config ├── security.yml ├── denylist.yml ├── default.example.yml ├── kiri.yml └── domain-terms.yml ├── docs ├── formal │ ├── PathPenaltyMerge.cfg │ ├── PathPenaltyMerge-max3.cfg │ ├── language-support │ │ ├── PlanB_Pool.cfg │ │ ├── PlanA_CentralRegistry.cfg │ │ ├── PlanC_Capability.cfg │ │ ├── PlanC_CapabilityComposition.als │ │ └── PlanB_HierarchicalBackend.als │ ├── AdaptiveK.cfg │ ├── AdaptiveK-exp.cfg │ ├── AdaptiveK-prod.cfg │ ├── PathPenaltyEncoding.tla │ ├── AdaptiveK.tla │ ├── README.md │ └── adaptive-k.md ├── principles.md ├── dev │ ├── node-version.md │ └── path-penalties.md ├── processes │ └── security-review.md ├── user │ ├── path-penalties.ja.md │ └── path-penalties.md ├── operations.md ├── runbook.md ├── doc_index.yaml └── overview.md ├── tsconfig.build.json ├── .gitignore ├── baselines └── vscode-golden │ ├── 2025-11-15T05-31-33-856Z │ └── snapshot.json │ └── manifest.json ├── tests ├── server │ ├── scoring-profiles.spec.ts │ ├── main.cli.spec.ts │ ├── audit.spec.ts │ ├── domain-terms.spec.ts │ ├── fts-status-cache.spec.ts │ ├── degrade.spec.ts │ ├── config-loader.spec.ts │ └── resolve-repo.spec.ts ├── daemon │ └── daemon.cli.spec.ts ├── client │ ├── proxy.cli.spec.ts │ ├── cli.security.spec.ts │ └── proxy.spec.ts ├── indexer │ ├── dart │ │ ├── __fixtures__ │ │ │ ├── outline-basic.json │ │ │ └── outline-nested.json │ │ └── test-helpers.ts │ └── path-normalization.spec.ts ├── shared │ ├── tokenizer.spec.ts │ ├── masker.spec.ts │ ├── adaptive-k.spec.ts │ └── cli │ │ └── testHelpers.ts ├── helpers │ ├── db-setup.ts │ ├── test-repo.ts │ ├── fixtures.ts │ └── migration-setup.ts ├── eval │ ├── metrics.spec.ts │ ├── results │ │ └── 2025-11-17-docs-plain.md │ └── goldens │ │ ├── queries-core4.yaml │ │ └── baseline.json └── integration │ └── security.lock.integration.spec.ts ├── scripts ├── update-deps.ts ├── check-adaptive-k.ts ├── diag │ ├── health.ts │ ├── cleanup-hints.ts │ ├── build-hint-dictionary.ts │ ├── query-terms.ts │ └── dump-hints.ts ├── docs │ ├── scan.mjs │ └── lint.mjs ├── setup-golden.sh ├── build │ └── copy-assets.ts ├── audit │ └── export-log.ts ├── assay │ ├── plugins │ │ └── context-coverage-metric.ts │ └── kiri-variants.ts ├── diag.ts ├── datasets │ └── fix-expected-section.ts └── add-schema-hints.ts ├── tsconfig.json ├── vitest.config.ts ├── .eslintrc.cjs ├── .shirushi.yml ├── LICENSE ├── assay.config.ts ├── eslint.config.js └── .github └── workflows └── ci.yml /.prettierignore: -------------------------------------------------------------------------------- 1 | external/assay-kit/ 2 | tmp/ 3 | -------------------------------------------------------------------------------- /mise.toml: -------------------------------------------------------------------------------- 1 | [tools] 2 | node = "24" 3 | pnpm = "latest" 4 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "external/assay-kit"] 2 | path = external/assay-kit 3 | url = git@github.com:CAPHTECH/assay-kit.git 4 | -------------------------------------------------------------------------------- /src/indexer/codeintel/rust/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Rust Analyzer Module 3 | */ 4 | 5 | export { RustAnalyzer, createRustAnalyzer } from "./analyzer.js"; 6 | -------------------------------------------------------------------------------- /prettier.config.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | singleQuote: false, 3 | trailingComma: "es5", 4 | tabWidth: 2, 5 | semi: true, 6 | printWidth: 100 7 | }; 8 | -------------------------------------------------------------------------------- /sql/schema.sql: -------------------------------------------------------------------------------- 1 | -- DuckDB schema placeholder for KIRI 2 | CREATE TABLE IF NOT EXISTS repo ( 3 | id INTEGER, 4 | root TEXT, 5 | indexed_at TIMESTAMP 6 | ); 7 | -------------------------------------------------------------------------------- /src/indexer/codeintel/php/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * PHP Analyzer Module 3 | * 4 | * PHP ファイル用の言語アナライザーをエクスポート 5 | */ 6 | 7 | export { PHPAnalyzer, createPHPAnalyzer } from "./analyzer.js"; 8 | -------------------------------------------------------------------------------- /examples/claude-code-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "mcpServers": { 3 | "kiri": { 4 | "command": "kiri", 5 | "args": ["--repo", ".", "--db", ".kiri/index.duckdb"] 6 | } 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /src/indexer/codeintel/dart/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Dart Analyzer Module 3 | * 4 | * Dart ファイル用の言語アナライザーをエクスポート 5 | */ 6 | 7 | export { DartAnalyzer, createDartAnalyzer } from "./adapter.js"; 8 | -------------------------------------------------------------------------------- /src/indexer/codeintel/java/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Java Analyzer Module 3 | * 4 | * Java ファイル用の言語アナライザーをエクスポート 5 | */ 6 | 7 | export { JavaAnalyzer, createJavaAnalyzer } from "./analyzer.js"; 8 | -------------------------------------------------------------------------------- /src/indexer/codeintel/swift/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Swift Analyzer Module 3 | * 4 | * Swift ファイル用の言語アナライザーをエクスポート 5 | */ 6 | 7 | export { SwiftAnalyzer, createSwiftAnalyzer } from "./analyzer.js"; 8 | -------------------------------------------------------------------------------- /src/indexer/codeintel/typescript/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * TypeScript Analyzer Module 3 | * 4 | * TypeScript/TSX ファイル用の言語アナライザーをエクスポート 5 | */ 6 | 7 | export { TypeScriptAnalyzer, createTypeScriptAnalyzer } from "./analyzer.js"; 8 | -------------------------------------------------------------------------------- /.mcp.json: -------------------------------------------------------------------------------- 1 | { 2 | "mcpServers": { 3 | "pce-memory": { 4 | "type": "stdio", 5 | "command": "npx", 6 | "args": ["pce-memory@latest", "--db", "~/.pce/kiri.db"], 7 | "env": {} 8 | } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /types/index.d.ts: -------------------------------------------------------------------------------- 1 | export interface Snippet { 2 | path: string; 3 | range: [number, number]; 4 | symbols: string[]; 5 | } 6 | 7 | export interface BundleExplanation { 8 | reason: string; 9 | weight: number; 10 | } 11 | -------------------------------------------------------------------------------- /src/client/index.ts: -------------------------------------------------------------------------------- 1 | export function buildContextBundleRequest( 2 | snippets: Array<{ path: string; lines: number[] }> 3 | ): string { 4 | // TODO: align with MCP client schema 5 | return JSON.stringify({ snippets }, null, 2); 6 | } 7 | -------------------------------------------------------------------------------- /config/security.yml: -------------------------------------------------------------------------------- 1 | # Security baseline for KIRI MCP deployment 2 | allowed_paths: 3 | - ./ 4 | - ../shared 5 | allow_network_egress: false 6 | allow_subprocess: false 7 | sensitive_tokens: 8 | - "sk-" 9 | - "ghp_" 10 | - "-----BEGIN" 11 | -------------------------------------------------------------------------------- /examples/codex-mcp-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "mcpServers": { 3 | "kiri": { 4 | "command": "kiri", 5 | "args": [ 6 | "--repo", 7 | "/path/to/your/project", 8 | "--db", 9 | "/path/to/your/project/.kiri/index.duckdb" 10 | ] 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /config/denylist.yml: -------------------------------------------------------------------------------- 1 | # Paths that must never be indexed or surfaced 2 | patterns: 3 | - secrets/** 4 | - "*.pem" 5 | - .env* 6 | # Binary and archive files 7 | - "*.zip" 8 | - "*.tar" 9 | - "*.tar.gz" 10 | - "*.tgz" 11 | - "*.rar" 12 | - "*.7z" 13 | - "*.jar" 14 | - "*.war" 15 | - "*.ear" 16 | -------------------------------------------------------------------------------- /docs/formal/PathPenaltyMerge.cfg: -------------------------------------------------------------------------------- 1 | SPECIFICATION Spec 2 | CONSTANTS 3 | MaxSeqLenYaml = 2 4 | MaxSeqLenEnv = 2 5 | MaxSeqLenProfile = 2 6 | INVARIANTS 7 | MergePrecedenceInvariant 8 | LongestPrefixInvariant 9 | ScanMatchesInvariant 10 | EncodeDecodeInvariant 11 | PROPERTIES 12 | EventuallyConsistent 13 | -------------------------------------------------------------------------------- /docs/formal/PathPenaltyMerge-max3.cfg: -------------------------------------------------------------------------------- 1 | SPECIFICATION Spec 2 | CONSTANTS 3 | MaxSeqLenYaml = 3 4 | MaxSeqLenEnv = 2 5 | MaxSeqLenProfile = 2 6 | INVARIANTS 7 | MergePrecedenceInvariant 8 | LongestPrefixInvariant 9 | ScanMatchesInvariant 10 | EncodeDecodeInvariant 11 | PROPERTIES 12 | EventuallyConsistent 13 | -------------------------------------------------------------------------------- /src/shared/index.ts: -------------------------------------------------------------------------------- 1 | export function assertCondition(condition: unknown, message: string): asserts condition { 2 | if (!condition) { 3 | throw new Error(message); 4 | } 5 | } 6 | 7 | export function notImplemented(feature: string): never { 8 | throw new Error(`Feature not implemented: ${feature}`); 9 | } 10 | -------------------------------------------------------------------------------- /tsconfig.build.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.json", 3 | "compilerOptions": { 4 | "noEmit": false, 5 | "allowImportingTsExtensions": false, 6 | "outDir": "dist", 7 | "sourceMap": true, 8 | "declaration": true, 9 | "declarationMap": true 10 | }, 11 | "exclude": ["tests", "scripts", "var", "dist"] 12 | } 13 | -------------------------------------------------------------------------------- /docs/formal/language-support/PlanB_Pool.cfg: -------------------------------------------------------------------------------- 1 | CONSTANTS 2 | Languages = {"ts", "swift", "dart"} 3 | Files = {"f1", "f2"} 4 | MaxPoolSize = 3 5 | 6 | SPECIFICATION Spec 7 | 8 | INVARIANT TypeOK 9 | INVARIANT PoolSizeLimit 10 | INVARIANT OnlyInUseClientsLock 11 | INVARIANT AvailableNoLocks 12 | INVARIANT MutualExclusion 13 | INVARIANT ClientLanguageMatch 14 | -------------------------------------------------------------------------------- /docs/formal/language-support/PlanA_CentralRegistry.cfg: -------------------------------------------------------------------------------- 1 | CONSTANTS 2 | Languages = {"ts", "swift", "php"} 3 | Files = {"f1", "f2"} 4 | Analyzers = {"a1", "a2", "a3"} 5 | 6 | SPECIFICATION Spec 7 | 8 | INVARIANT TypeOK 9 | INVARIANT MutualExclusion 10 | INVARIANT LockLanguageConsistency 11 | INVARIANT RegistryUniqueness 12 | INVARIANT LockRegistryConsistency 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | dist/ 3 | var/ 4 | tmp/ 5 | external/vscode/ 6 | external/*/.kiri/ 7 | *.log 8 | .env* 9 | .DS_Store 10 | coverage/ 11 | .claude/settings.local.json 12 | .kiri/ 13 | package/ 14 | *.tgz 15 | 16 | # Development test scripts 17 | test-*.mjs 18 | debug-*.mjs 19 | scripts/debug/ 20 | .serena/ 21 | tests/fixtures/sample-repo/.git 22 | 23 | # Alloy verification output (temporary artifacts) 24 | Plan*_*/ 25 | -------------------------------------------------------------------------------- /docs/formal/language-support/PlanC_Capability.cfg: -------------------------------------------------------------------------------- 1 | CONSTANTS 2 | Languages = {"ts", "swift"} 3 | Files = {"f1", "f2"} 4 | Capabilities = {"symbol", "type", "doc"} 5 | RequiredCaps = {"symbol"} 6 | 7 | SPECIFICATION Spec 8 | 9 | INVARIANT TypeOK 10 | INVARIANT MinimumCapabilities 11 | INVARIANT ProviderLanguageMatch 12 | INVARIANT LockLanguageConsistency 13 | INVARIANT MutualExclusion 14 | INVARIANT CapabilityTraceability 15 | -------------------------------------------------------------------------------- /docs/formal/AdaptiveK.cfg: -------------------------------------------------------------------------------- 1 | CONSTANTS 2 | CATEGORIES = {"bugfix", "integration", "testfail", "performance", "generic", "metrics"} 3 | ALLOWED_SET = {5, 10, 20} 4 | K_MIN = 3 5 | K_MAX = 50 6 | K_BUGFIX = 5 7 | K_INTEGRATION = 5 8 | K_TESTFAIL = 20 9 | K_PERFORMANCE = 20 10 | K_DEFAULT = 10 11 | INIT Init 12 | NEXT Next 13 | INVARIANTS InvAllowedSet InvRange InvBugfixPrecision InvIntegrationPrecision InvTestfailRecall InvPerformanceRecall InvGenericBalance 14 | -------------------------------------------------------------------------------- /docs/formal/AdaptiveK-exp.cfg: -------------------------------------------------------------------------------- 1 | CONSTANTS 2 | CATEGORIES = {"bugfix", "integration", "testfail", "performance", "generic", "metrics"} 3 | ALLOWED_SET = {5, 10, 15, 20} 4 | K_MIN = 3 5 | K_MAX = 50 6 | K_BUGFIX = 5 7 | K_INTEGRATION = 5 8 | K_TESTFAIL = 20 9 | K_PERFORMANCE = 20 10 | K_DEFAULT = 15 11 | INIT Init 12 | NEXT Next 13 | INVARIANTS InvAllowedSet InvRange InvBugfixPrecision InvIntegrationPrecision InvTestfailRecall InvPerformanceRecall InvGenericBalance 14 | -------------------------------------------------------------------------------- /docs/formal/AdaptiveK-prod.cfg: -------------------------------------------------------------------------------- 1 | CONSTANTS 2 | CATEGORIES = {"bugfix", "integration", "testfail", "performance", "generic", "metrics"} 3 | ALLOWED_SET = {5, 10, 20} 4 | K_MIN = 3 5 | K_MAX = 50 6 | K_BUGFIX = 5 7 | K_INTEGRATION = 5 8 | K_TESTFAIL = 20 9 | K_PERFORMANCE = 20 10 | K_DEFAULT = 10 11 | INIT Init 12 | NEXT Next 13 | INVARIANTS InvAllowedSet InvRange InvBugfixPrecision InvIntegrationPrecision InvTestfailRecall InvPerformanceRecall InvGenericBalance 14 | -------------------------------------------------------------------------------- /baselines/vscode-golden/2025-11-15T05-31-33-856Z/snapshot.json: -------------------------------------------------------------------------------- 1 | { 2 | "metrics": { 3 | "latencyMs": 2206.416667, 4 | "precision": 0.15, 5 | "timeToFirstUseful": 0, 6 | "extras": { 7 | "tokenSavingsRatio": 0.957083, 8 | "hintCoverage": 0.2, 9 | "avgTokensEstimate": 30195.416667, 10 | "avgBaselineTokens": 501540.583333 11 | } 12 | }, 13 | "metadata": { 14 | "source": "var/eval/latest.metrics.json", 15 | "timestamp": "2025-11-15T05:31:33.474Z" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /baselines/vscode-golden/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "targetId": "vscode-golden", 3 | "versions": [ 4 | { 5 | "id": "2025-11-15T05-31-33-856Z", 6 | "createdAt": "2025-11-15T05:31:33.856Z", 7 | "status": "active", 8 | "snapshotPath": "/Users/rizumita/Workspace/CAPHTECH.public/kiri/baselines/vscode-golden/2025-11-15T05-31-33-856Z/snapshot.json", 9 | "notes": "VS Code golden baseline (2025-11-15)" 10 | } 11 | ], 12 | "revision": "08603bee356b40ce07b711d9a676ed24b7a591c0606f2d1d385cb322923e75b4" 13 | } 14 | -------------------------------------------------------------------------------- /src/shared/adaptive-k-categories.ts: -------------------------------------------------------------------------------- 1 | export const ADAPTIVE_K_CATEGORIES = [ 2 | "bugfix", 3 | "testfail", 4 | "debug", 5 | "api", 6 | "docs", 7 | "feature", 8 | "integration", 9 | "performance", 10 | ] as const; 11 | 12 | export type AdaptiveKCategory = (typeof ADAPTIVE_K_CATEGORIES)[number]; 13 | 14 | export const ADAPTIVE_K_CATEGORY_SET = new Set(ADAPTIVE_K_CATEGORIES); 15 | 16 | export const ADAPTIVE_K_CATEGORY_ALIASES: Record = { 17 | editor: "feature", 18 | infra: "integration", 19 | "docs-plain": "docs", 20 | }; 21 | -------------------------------------------------------------------------------- /types/opentelemetry.d.ts: -------------------------------------------------------------------------------- 1 | declare module "@opentelemetry/api" { 2 | export interface SpanStatus { 3 | code: number; 4 | message?: string; 5 | } 6 | 7 | export interface Span { 8 | setAttribute(key: string, value: unknown): void; 9 | recordException(error: unknown): void; 10 | setStatus(status: SpanStatus): void; 11 | end(): void; 12 | } 13 | 14 | export interface Tracer { 15 | startActiveSpan(name: string, fn: (span: Span) => Promise): Promise; 16 | } 17 | 18 | export const trace: { 19 | getTracer(name: string): Tracer; 20 | }; 21 | } 22 | -------------------------------------------------------------------------------- /docs/principles.md: -------------------------------------------------------------------------------- 1 | --- 2 | doc_id: "PLAN-002" 3 | title: "開発原則と未解決課題" 4 | category: "planning" 5 | tags: 6 | - principles 7 | - guidelines 8 | - issues 9 | service: "kiri" 10 | --- 11 | 12 | # 開発原則と未解決課題 13 | 14 | ## 開発上の決めごと 15 | 16 | - **Degrade 最優先**: 拡張に依存しない最小構成を常に維持し、追加機能は段階的に載せる。 17 | - **メタデータ先行**: 文字列・依存・近接の 3 軸で 80% のニーズを満たし、Embedding は補完に留める。 18 | - **再現性重視**: 同一入力に対してランキング結果が決定論的になるよう乱数シードを固定する。 19 | 20 | ## オープンな論点 / リスク 21 | 22 | - **TypeScript モジュール境界**: モノレポでの `common/` 誤命中を防ぐ規約設計が必要。 23 | - **埋め込み更新コスト**: 差分のみ再計算するか、低頻度のバッチ更新で妥協するか判断が必要。 24 | - **巨大バイナリ流入**: 自動除外・検知の閾値やワークフローを継続的に改善する必要がある。 25 | -------------------------------------------------------------------------------- /src/shared/adaptive-k.ts: -------------------------------------------------------------------------------- 1 | export interface AdaptiveKConfig { 2 | enabled: boolean; 3 | allowedSet: number[]; 4 | kMin: number; 5 | kMax: number; 6 | kMap: Record; 7 | kDefault: number; 8 | kWhenDisabled: number; 9 | } 10 | 11 | export type QueryCategory = string | undefined; 12 | 13 | export function getAdaptiveK(category: QueryCategory, config: AdaptiveKConfig): number { 14 | if (!config.enabled) { 15 | return config.kWhenDisabled; 16 | } 17 | const value = 18 | category !== undefined ? (config.kMap[category] ?? config.kDefault) : config.kDefault; 19 | return value; 20 | } 21 | -------------------------------------------------------------------------------- /tests/server/scoring-profiles.spec.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from "vitest"; 2 | 3 | import { loadScoringProfile } from "../../src/server/scoring.js"; 4 | 5 | describe("scoring profiles", () => { 6 | it("default profile stays within regression guardrails", () => { 7 | const profile = loadScoringProfile("default"); 8 | expect(profile.textMatch).toBeGreaterThanOrEqual(0.8); 9 | expect(profile.docPenaltyMultiplier).toBeLessThanOrEqual(0.7); 10 | expect(profile.configPenaltyMultiplier).toBeLessThanOrEqual(0.1); 11 | expect(profile.implBoostMultiplier).toBeGreaterThanOrEqual(1); 12 | }); 13 | }); 14 | -------------------------------------------------------------------------------- /docs/dev/node-version.md: -------------------------------------------------------------------------------- 1 | --- 2 | doc_id: "GUIDE-005" 3 | title: "Node バージョンとツールチェーン" 4 | category: "development" 5 | tags: 6 | - node 7 | - toolchain 8 | - setup 9 | service: "kiri" 10 | --- 11 | 12 | # Node バージョンとツールチェーン 13 | 14 | - 推奨 Node: **20.x (LTS)** 15 | - `.nvmrc` / `.node-version` に `v20` を記載済み。 16 | - mise を使用する場合は `.mise.toml` に node=20 / pnpm=9 を定義済み。`mise install` で取得。 17 | - Corepack を有効化し、pnpm@9 を使用すること。 18 | - `corepack enable` 19 | - `corepack prepare pnpm@9 --activate` 20 | - Node 25 では duckdb の prebuilt binary が提供されず、`duckdb.node` が見つからないためテストが失敗する。必ず Node 20 環境で `pnpm install`, `pnpm run build`, `pnpm test` を実行すること。 21 | -------------------------------------------------------------------------------- /scripts/update-deps.ts: -------------------------------------------------------------------------------- 1 | import { spawnSync } from "node:child_process"; 2 | 3 | export function updateDependencies(extraArgs: string[] = []): number { 4 | const result = spawnSync("pnpm", ["up", "--latest", ...extraArgs], { 5 | stdio: "inherit", 6 | }); 7 | return result.status ?? 1; 8 | } 9 | 10 | const executedDirectly = 11 | typeof process.argv[1] === "string" && new URL(import.meta.url).pathname === process.argv[1]; 12 | 13 | if (executedDirectly) { 14 | const status = updateDependencies(process.argv.slice(2)); 15 | if (status !== 0) { 16 | console.error("依存関係の更新に失敗しました"); 17 | } 18 | process.exitCode = status; 19 | } 20 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "module": "NodeNext", 5 | "moduleResolution": "NodeNext", 6 | "lib": ["ES2022"], 7 | "strict": true, 8 | "noUncheckedIndexedAccess": true, 9 | "noImplicitOverride": true, 10 | "exactOptionalPropertyTypes": true, 11 | "allowImportingTsExtensions": true, 12 | "allowSyntheticDefaultImports": true, 13 | "esModuleInterop": true, 14 | "resolveJsonModule": true, 15 | "noEmit": true, 16 | "skipLibCheck": true, 17 | "types": ["node", "vitest"] 18 | }, 19 | "include": ["src", "tests", "scripts", "types"], 20 | "exclude": ["dist", "var", "scripts/assay"] 21 | } 22 | -------------------------------------------------------------------------------- /config/default.example.yml: -------------------------------------------------------------------------------- 1 | mcp: 2 | port: 8765 3 | host: 127.0.0.1 4 | tools: 5 | - context_bundle 6 | - files_search 7 | 8 | # Tokenization configuration for keyword extraction 9 | tokenization: 10 | # Strategy: "phrase-aware" (default), "legacy", or "hybrid" 11 | # - phrase-aware: Preserves hyphenated terms (e.g., "page-agent" stays as one unit) 12 | # - legacy: Splits on hyphens (e.g., "page-agent" → ["page", "agent"]) 13 | # - hybrid: Emits both phrases and split keywords 14 | strategy: "phrase-aware" 15 | 16 | indexer: 17 | repoRoot: "../../target-repo" 18 | database: "var/index.duckdb" 19 | exclude: 20 | - "**/node_modules/**" 21 | - "**/dist/**" 22 | -------------------------------------------------------------------------------- /vitest.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from "vitest/config"; 2 | 3 | export default defineConfig({ 4 | test: { 5 | globals: true, 6 | include: ["tests/**/*.spec.ts"], 7 | exclude: ["external/**"], 8 | // Force sequential test execution to avoid lock file conflicts 9 | // Tests use file-based locking which cannot safely run in parallel 10 | pool: "forks", 11 | poolOptions: { 12 | forks: { 13 | singleFork: true, 14 | maxForks: 1, 15 | minForks: 1, 16 | }, 17 | }, 18 | coverage: { 19 | provider: "v8", 20 | reporter: ["text", "lcov"], 21 | lines: 0.8, 22 | statements: 0.8, 23 | }, 24 | }, 25 | }); 26 | -------------------------------------------------------------------------------- /config/kiri.yml: -------------------------------------------------------------------------------- 1 | path_penalties: 2 | # Reduce noisy auxiliary packages 3 | - prefix: ".eslint-plugin-local/" 4 | multiplier: 0.1 5 | - prefix: "cli/" 6 | multiplier: 0.25 7 | - prefix: "cli/src/" 8 | multiplier: 0.2 9 | - prefix: "extensions/" 10 | multiplier: 0.15 11 | - prefix: "tests/eval/" 12 | multiplier: 0.05 13 | - prefix: "datasets/" 14 | multiplier: 0.05 15 | - prefix: "tests/" 16 | multiplier: 0.3 17 | - prefix: "examples/" 18 | multiplier: 0.1 19 | - prefix: "fixtures/" 20 | multiplier: 0.1 21 | - prefix: "testdata/" 22 | multiplier: 0.1 23 | # Suppress repo-local baseline fixtures that surface via fallback 24 | - prefix: "packages/assay-kit/baseline-tests" 25 | multiplier: 0.05 26 | -------------------------------------------------------------------------------- /scripts/check-adaptive-k.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ts-node 2 | import { loadServerConfig } from "../src/server/config.js"; 3 | 4 | function main(): void { 5 | try { 6 | const cfg = loadServerConfig(); 7 | // Already validated in loadServerConfig; reaching here means OK. 8 | console.log("adaptiveK config OK", { 9 | enabled: cfg.adaptiveK.enabled, 10 | allowedSet: cfg.adaptiveK.allowedSet, 11 | kMin: cfg.adaptiveK.kMin, 12 | kMax: cfg.adaptiveK.kMax, 13 | kDefault: cfg.adaptiveK.kDefault, 14 | kWhenDisabled: cfg.adaptiveK.kWhenDisabled, 15 | }); 16 | process.exit(0); 17 | } catch (error) { 18 | console.error("adaptiveK config validation failed", error); 19 | process.exit(1); 20 | } 21 | } 22 | 23 | main(); 24 | -------------------------------------------------------------------------------- /scripts/diag/health.ts: -------------------------------------------------------------------------------- 1 | import { request } from "node:http"; 2 | 3 | export interface HealthReport { 4 | metricsReachable: boolean; 5 | latencyMs: number | null; 6 | } 7 | 8 | export async function checkHealth( 9 | url = process.env.KIRI_METRICS_URL ?? "http://127.0.0.1:8765/metrics" 10 | ): Promise { 11 | const started = Date.now(); 12 | return await new Promise((resolve) => { 13 | const req = request(url, (res) => { 14 | res.resume(); 15 | res.on("end", () => { 16 | resolve({ metricsReachable: res.statusCode === 200, latencyMs: Date.now() - started }); 17 | }); 18 | }); 19 | req.on("error", () => { 20 | resolve({ metricsReachable: false, latencyMs: null }); 21 | }); 22 | req.end(); 23 | }); 24 | } 25 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | export { DuckDBClient, type DuckDBClientOptions } from "./shared/duckdb.js"; 2 | export { 3 | startDaemon, 4 | stopDaemon, 5 | isDaemonRunning, 6 | type StartDaemonOptions, 7 | } from "./client/start-daemon.js"; 8 | export { buildContextBundleRequest } from "./client/index.js"; 9 | export { bootstrapServer, type BootstrapOptions } from "./server/bootstrap.js"; 10 | export { 11 | createServerRuntime, 12 | type CommonServerOptions, 13 | type ServerRuntime, 14 | } from "./server/runtime.js"; 15 | export { startServer, type ServerOptions } from "./server/main.js"; 16 | export { ensureDatabaseIndexed } from "./server/indexBootstrap.js"; 17 | export { IndexWatcher, type IndexWatcherOptions } from "./indexer/watch.js"; 18 | export { runIndexer } from "./indexer/cli.js"; 19 | export { DaemonLifecycle } from "./daemon/lifecycle.js"; 20 | -------------------------------------------------------------------------------- /src/shared/utils/glob.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * シンプルな glob マッチング - `*` と複数階層の `**` をサポート 3 | */ 4 | export function matchesGlob(path: string, pattern: string): boolean { 5 | if (!pattern) { 6 | return false; 7 | } 8 | 9 | const normalizedPath = path.replace(/^[.][/\\]/u, ""); 10 | const normalizedPattern = pattern.replace(/^[.][/\\]/u, ""); 11 | 12 | let regexPattern = normalizedPattern 13 | .replace(/[.+?^${}()|[\]\\]/g, "\\$&") 14 | .replace(/\*\*\//g, "DOUBLESTAR_SLASH") 15 | .replace(/\/\*\*/g, "SLASH_DOUBLESTAR") 16 | .replace(/\*\*/g, "DOUBLESTAR") 17 | .replace(/\*/g, "[^/]*") 18 | .replace(/DOUBLESTAR_SLASH/g, "(?:.*/)?") 19 | .replace(/SLASH_DOUBLESTAR/g, "(?:/.*)?") 20 | .replace(/DOUBLESTAR/g, ".*"); 21 | 22 | regexPattern = `^${regexPattern}$`; 23 | return new RegExp(regexPattern).test(normalizedPath); 24 | } 25 | -------------------------------------------------------------------------------- /src/shared/utils/validation.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Validation utilities for CLI arguments and user input 3 | */ 4 | 5 | /** 6 | * Parse a string to a positive integer with validation 7 | * 8 | * @param value - String value to parse 9 | * @param defaultValue - Default value if input is undefined 10 | * @param name - Parameter name for error messages 11 | * @returns Parsed integer 12 | * @throws Error if value is not a valid positive integer 13 | */ 14 | export function parsePositiveInt( 15 | value: string | undefined, 16 | defaultValue: number, 17 | name: string 18 | ): number { 19 | if (!value) return defaultValue; 20 | 21 | const parsed = parseInt(value, 10); 22 | if (Number.isNaN(parsed) || parsed < 0) { 23 | throw new Error( 24 | `Invalid ${name}: "${value}". Expected a positive integer. Use default ${defaultValue} or specify a valid number.` 25 | ); 26 | } 27 | return parsed; 28 | } 29 | -------------------------------------------------------------------------------- /src/indexer/language.ts: -------------------------------------------------------------------------------- 1 | const LANGUAGE_BY_EXTENSION: Record = { 2 | ".ts": "TypeScript", 3 | ".tsx": "TypeScript", 4 | ".js": "JavaScript", 5 | ".jsx": "JavaScript", 6 | ".json": "JSON", 7 | ".md": "Markdown", 8 | ".yml": "YAML", 9 | ".yaml": "YAML", 10 | ".py": "Python", 11 | ".rs": "Rust", 12 | ".go": "Go", 13 | ".java": "Java", 14 | ".rb": "Ruby", 15 | ".c": "C", 16 | ".h": "C", 17 | ".cpp": "C++", 18 | ".hpp": "C++", 19 | ".cc": "C++", 20 | ".hh": "C++", 21 | ".cs": "C#", 22 | ".dart": "Dart", 23 | ".php": "PHP", 24 | ".swift": "Swift", 25 | ".kt": "Kotlin", 26 | ".m": "Objective-C", 27 | ".mm": "Objective-C++", 28 | ".scala": "Scala", 29 | ".sh": "Shell", 30 | }; 31 | 32 | export function detectLanguage(extension: string): string | null { 33 | const normalized = extension.toLowerCase(); 34 | return LANGUAGE_BY_EXTENSION[normalized] ?? null; 35 | } 36 | -------------------------------------------------------------------------------- /.eslintrc.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | root: true, 3 | parser: "@typescript-eslint/parser", 4 | parserOptions: { 5 | project: false, 6 | tsconfigRootDir: __dirname, 7 | }, 8 | plugins: ["@typescript-eslint", "import"], 9 | extends: ["eslint:recommended", "plugin:@typescript-eslint/recommended", "plugin:import/recommended", "plugin:import/typescript", "prettier"], 10 | env: { 11 | es2022: true, 12 | node: true 13 | }, 14 | rules: { 15 | "import/order": [ 16 | "warn", 17 | { 18 | "alphabetize": { "order": "asc", "caseInsensitive": true }, 19 | "newlines-between": "always" 20 | } 21 | ], 22 | // TypeScript handles these checks, so we disable ESLint's JS-only version 23 | "no-undef": "off", 24 | "@typescript-eslint/no-unused-vars": ["error", { "argsIgnorePattern": "^_", "varsIgnorePattern": "^_" }] 25 | }, 26 | ignorePatterns: ["dist", "var"] 27 | }; 28 | -------------------------------------------------------------------------------- /tests/server/main.cli.spec.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Integration tests for kiri-server CLI 3 | */ 4 | 5 | import { dirname, join } from "path"; 6 | import { fileURLToPath } from "url"; 7 | 8 | import { createCliTests } from "../shared/cli/testHelpers.js"; 9 | 10 | const __filename = fileURLToPath(import.meta.url); 11 | const __dirname = dirname(__filename); 12 | const serverPath = join(__dirname, "../../dist/src/server/main.js"); 13 | 14 | const cliTests = createCliTests({ 15 | cliPath: serverPath, 16 | commandName: "kiri-server", 17 | description: "KIRI MCP Server", 18 | expectedSections: [ 19 | "Repository / Database:", 20 | "--repo", 21 | "--db", 22 | "Server Mode:", 23 | "--port", 24 | "Indexing:", 25 | "--reindex", 26 | "--allow-degrade", 27 | "Watch Mode:", 28 | "--watch", 29 | "--debounce", 30 | "Security:", 31 | "--security-config", 32 | ], 33 | }); 34 | 35 | cliTests.runAll(); 36 | -------------------------------------------------------------------------------- /tests/daemon/daemon.cli.spec.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Integration tests for kiri-daemon CLI 3 | */ 4 | 5 | import { dirname, join } from "path"; 6 | import { fileURLToPath } from "url"; 7 | 8 | import { createCliTests } from "../shared/cli/testHelpers.js"; 9 | 10 | const __filename = fileURLToPath(import.meta.url); 11 | const __dirname = dirname(__filename); 12 | const daemonPath = join(__dirname, "../../dist/src/daemon/daemon.js"); 13 | 14 | const cliTests = createCliTests({ 15 | cliPath: daemonPath, 16 | commandName: "kiri-daemon", 17 | description: "KIRI Daemon Process", 18 | expectedSections: [ 19 | "Repository / Database:", 20 | "--repo", 21 | "--db", 22 | "Daemon Lifecycle:", 23 | "--socket-path", 24 | "--daemon-timeout", 25 | "Watch Mode:", 26 | "--watch", 27 | "--debounce", 28 | "Security:", 29 | "--allow-degrade", 30 | "--security-config", 31 | ], 32 | }); 33 | 34 | cliTests.runAll(); 35 | -------------------------------------------------------------------------------- /tests/client/proxy.cli.spec.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Integration tests for kiri proxy CLI 3 | */ 4 | 5 | import { dirname, join } from "path"; 6 | import { fileURLToPath } from "url"; 7 | 8 | import { createCliTests } from "../shared/cli/testHelpers.js"; 9 | 10 | const __filename = fileURLToPath(import.meta.url); 11 | const __dirname = dirname(__filename); 12 | const proxyPath = join(__dirname, "../../dist/src/client/proxy.js"); 13 | 14 | const cliTests = createCliTests({ 15 | cliPath: proxyPath, 16 | commandName: "kiri", 17 | description: "KIRI MCP Client Proxy", 18 | expectedSections: [ 19 | "Repository / Database:", 20 | "--repo", 21 | "--db", 22 | "Daemon Connection:", 23 | "--socket-path", 24 | "Watch Mode:", 25 | "--watch", 26 | "--debounce", 27 | "Security:", 28 | "--allow-degrade", 29 | "--security-config", 30 | "Indexing:", 31 | "--full", 32 | ], 33 | }); 34 | 35 | cliTests.runAll(); 36 | -------------------------------------------------------------------------------- /.shirushi.yml: -------------------------------------------------------------------------------- 1 | # Shirushi Document ID Configuration for KIRI 2 | # 3 | # ID形式: {KIND}-{SER3} (例: ARCH-001, ADR-001, RUN-001) 4 | # ドキュメントの一意識別子を管理し、変更を検出する 5 | 6 | doc_globs: 7 | - "docs/**/*.md" 8 | - "!docs/formal/**" # TLA+/Alloy formal specs を除外 9 | - "!docs/eval-*.md" # 評価レポートを除外 10 | - "!docs/refactoring-analysis-*.md" # 分析レポートを除外 11 | 12 | index_file: "docs/doc_index.yaml" 13 | id_field: "doc_id" 14 | id_format: "{KIND}-{SER3}" 15 | 16 | dimensions: 17 | KIND: 18 | type: enum 19 | values: 20 | - ARCH # Architecture - コア設計とリファレンス 21 | - ADR # Architecture Decision Record - 設計決定 22 | - RUN # Runbook - 運用手順 23 | - GUIDE # Guide - ユーザー/開発者向けガイド 24 | - SEC # Security - セキュリティドキュメント 25 | - TEST # Testing - テスト戦略 26 | - PLAN # Planning - ロードマップと計画 27 | SER3: 28 | type: serial 29 | digits: 3 30 | scope: ["KIND"] 31 | 32 | forbid_id_change: true 33 | allow_missing_id_in_new_files: false 34 | -------------------------------------------------------------------------------- /docs/processes/security-review.md: -------------------------------------------------------------------------------- 1 | --- 2 | doc_id: "SEC-002" 3 | title: "セキュリティレビュー手順" 4 | category: "security" 5 | tags: 6 | - security 7 | - review 8 | - process 9 | service: "kiri" 10 | --- 11 | 12 | # セキュリティレビュー手順 13 | 14 | ## 週次レビューの目的 15 | 16 | - エクスポートログの棚卸しと、機密情報のマスキング状況を確認する。 17 | - Denylist と `.gitignore` の差分を把握し、不要なファイルがインデックスされていないかを検証する。 18 | - Degrade イベントの有無を確認し、再発防止のアクションを洗い出す。 19 | 20 | ## チェックリスト 21 | 22 | 1. `pnpm exec tsx scripts/diag.ts check-denylist` を実行し、差分が空であることを確認する。 23 | 2. `var/audit/` 配下の最新ログを開き、`path` / `rationale` に `***` マスキングが適用されていることを確認する。 24 | 3. `/metrics` の `kiri_mask_applied_total` と `kiri_denylist_hits_total` を記録し、前週からの増減をトレンドとして残す。 25 | 4. `pnpm exec tsx src/client/cli.ts security verify --db ` を実行し、`state: MATCH` を確認する。 26 | 5. 重大な異常が見つかった場合はインシデントチケットを起票し、runbook の手順で対応する。 27 | 28 | ## 記録テンプレート 29 | 30 | - レビュー日: 31 | - 担当者: 32 | - Denylist 差分: 33 | - マスキング件数 (前回比): 34 | - Degrade イベント有無: 35 | - 対応アクション: 36 | -------------------------------------------------------------------------------- /config/domain-terms.yml: -------------------------------------------------------------------------------- 1 | statistics: 2 | - mann-whitney-u: 3 | aliases: 4 | - mannWhitneyU 5 | - wilcoxon 6 | - rank-sum 7 | files: 8 | - src/stats/mann.ts 9 | - external/assay-kit/src/stats/mann-whitney.ts 10 | - rank-biserial: 11 | aliases: 12 | - rankBiserialEffect 13 | - effect-size 14 | - correlation 15 | files: 16 | - src/stats/rank-biserial.ts 17 | - external/assay-kit/src/stats/rank-biserial.ts 18 | 19 | orchestration: 20 | - tuning-orchestrator: 21 | aliases: 22 | - TuningOrchestrator 23 | - parameter-tuning 24 | - optimization-orchestrator 25 | - orchestrator 26 | files: 27 | - src/tuning/orchestrator.ts 28 | - runner/index.ts 29 | 30 | architecture: 31 | - plugin-registry: 32 | aliases: 33 | - PluginRegistry 34 | - plugin-system 35 | - registry 36 | files: 37 | - src/plugins/registry.ts 38 | -------------------------------------------------------------------------------- /scripts/docs/scan.mjs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | /** 3 | * Shirushi CLI wrapper for document scanning 4 | * 5 | * Workaround: shirushi's CLI uses import.meta.url check which fails 6 | * with symlinked node_modules on macOS due to path canonicalization. 7 | * This wrapper explicitly calls run() to bypass the check. 8 | */ 9 | 10 | import { fileURLToPath } from 'url'; 11 | import { dirname, resolve } from 'path'; 12 | import { realpathSync } from 'fs'; 13 | 14 | // Set up argv for subcommand 15 | process.argv = ['node', 'shirushi', 'scan', ...process.argv.slice(2)]; 16 | 17 | // Resolve the actual path to shirushi CLI (follows symlinks) 18 | const __filename = fileURLToPath(import.meta.url); 19 | const __dirname = dirname(__filename); 20 | const projectRoot = resolve(__dirname, '../..'); 21 | const shirushiCliPath = realpathSync( 22 | resolve(projectRoot, 'node_modules/shirushi/dist/cli/index.js') 23 | ); 24 | 25 | // Dynamic import using file URL to match import.meta.url check 26 | const { run } = await import(`file://${shirushiCliPath}`); 27 | run(); 28 | -------------------------------------------------------------------------------- /scripts/docs/lint.mjs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | /** 3 | * Shirushi CLI wrapper for document ID validation 4 | * 5 | * Workaround: shirushi's CLI uses import.meta.url check which fails 6 | * with symlinked node_modules on macOS due to path canonicalization. 7 | * This wrapper explicitly calls run() to bypass the check. 8 | */ 9 | 10 | import { fileURLToPath } from 'url'; 11 | import { dirname, resolve } from 'path'; 12 | import { realpathSync } from 'fs'; 13 | 14 | // Set up argv for subcommand 15 | process.argv = ['node', 'shirushi', 'lint', ...process.argv.slice(2)]; 16 | 17 | // Resolve the actual path to shirushi CLI (follows symlinks) 18 | const __filename = fileURLToPath(import.meta.url); 19 | const __dirname = dirname(__filename); 20 | const projectRoot = resolve(__dirname, '../..'); 21 | const shirushiCliPath = realpathSync( 22 | resolve(projectRoot, 'node_modules/shirushi/dist/cli/index.js') 23 | ); 24 | 25 | // Dynamic import using file URL to match import.meta.url check 26 | const { run } = await import(`file://${shirushiCliPath}`); 27 | run(); 28 | -------------------------------------------------------------------------------- /src/server/bootstrap.ts: -------------------------------------------------------------------------------- 1 | import { 2 | assertSecurityBaseline, 3 | evaluateSecurityStatus, 4 | updateSecurityLock, 5 | } from "../shared/security/config.js"; 6 | 7 | export interface BootstrapOptions { 8 | securityConfigPath?: string; 9 | securityLockPath?: string; 10 | allowWriteLock?: boolean; 11 | } 12 | 13 | export interface BootstrapReport { 14 | security: ReturnType; 15 | } 16 | 17 | export function bootstrapServer(options: BootstrapOptions = {}): BootstrapReport { 18 | const security = evaluateSecurityStatus(options.securityConfigPath, options.securityLockPath); 19 | if (!security.matches) { 20 | if (!security.lockHash && options.allowWriteLock) { 21 | updateSecurityLock(security.hash, options.securityLockPath); 22 | return { 23 | security: { 24 | ...security, 25 | lockHash: security.hash, 26 | matches: true, 27 | }, 28 | }; 29 | } 30 | assertSecurityBaseline(options.securityConfigPath, options.securityLockPath); 31 | } 32 | return { security }; 33 | } 34 | -------------------------------------------------------------------------------- /src/shared/fs/safePath.ts: -------------------------------------------------------------------------------- 1 | import { resolve, relative, sep } from "node:path"; 2 | import process from "node:process"; 3 | 4 | interface SafePathOptions { 5 | baseDir?: string; 6 | allowOutsideBase?: boolean; 7 | } 8 | 9 | export function resolveSafePath(inputPath: string, options?: SafePathOptions): string { 10 | if (!inputPath || typeof inputPath !== "string") { 11 | throw new Error("Path must be a non-empty string"); 12 | } 13 | 14 | const trimmed = inputPath.trim(); 15 | const allowOutsideBase = options?.allowOutsideBase ?? false; 16 | const baseDir = resolve(options?.baseDir ?? process.cwd()); 17 | 18 | const resolved = resolve(baseDir, trimmed); 19 | 20 | if (allowOutsideBase) { 21 | return resolved; 22 | } 23 | 24 | const relativePath = relative(baseDir, resolved); 25 | if (relativePath === "" || relativePath === ".") { 26 | return resolved; 27 | } 28 | 29 | if (relativePath.startsWith(`..${sep}`) || relativePath === "..") { 30 | throw new Error(`Path traversal attempt detected: ${inputPath}`); 31 | } 32 | 33 | return resolved; 34 | } 35 | -------------------------------------------------------------------------------- /docs/dev/path-penalties.md: -------------------------------------------------------------------------------- 1 | --- 2 | doc_id: "GUIDE-004" 3 | title: "Path Penalties Developer Notes" 4 | category: "configuration" 5 | tags: 6 | - path-penalty 7 | - dev-notes 8 | - caching 9 | service: "kiri" 10 | --- 11 | 12 | # パス乗算ペナルティ開発メモ 13 | 14 | ## ロード経路とキャッシュ 15 | 16 | - 実装: `src/server/config-loader.ts` 17 | - マージ順: `boost_profile` 既定 → 環境変数 → `.kiri/config.yaml`(後勝ち) 18 | - キャッシュ: `loadPathPenalties` は `(cwd, baseMultipliers, envスナップショット)` をキーにメモ化。ハンドラー側はプロファイルごとにマージ済み配列をキャッシュ。 19 | - 設定変更を即時反映させるには **プロセス再起動が必要**(メモリキャッシュを破棄するため)。ホットリロードは未対応。 20 | 21 | ## 正規化仕様 22 | 23 | - `\` → `/` に統一し POSIX 化。 24 | - 先頭の `/` やドライブレターは削除し、`..` を含むパスはエラーで拒否。 25 | - 末尾 `/` は入力にあった場合のみ維持。 26 | 27 | ## テスト 28 | 29 | - ユニット: `tests/server/config-loader.spec.ts`(マージ順・正規化・エラー系) 30 | - 統合: `tests/server/context.bundle.spec.ts` に YAML/env 反映のシナリオを追加。 31 | 32 | ## 拡張時の注意 33 | 34 | - `PathMultiplier` を増やす場合は最長一致ロジックを壊さないようソートを維持。 35 | - キャッシュキーは `basePathMultipliers` の内容を JSON 化しているため、可変配列を渡すと予期せぬミスキャッシュが起こる可能性あり。基本は不変配列を渡すこと。 36 | - Windows 固有挙動を変える際は正規化仕様とユーザーガイド(`docs/user/path-penalties.md`)を合わせて更新すること。 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 CAPHTECH 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/server/audit.spec.ts: -------------------------------------------------------------------------------- 1 | import { mkdtemp, readFile, rm } from "node:fs/promises"; 2 | import { tmpdir } from "node:os"; 3 | import { join } from "node:path"; 4 | 5 | import { describe, expect, it } from "vitest"; 6 | 7 | import { exportAuditLog } from "../../scripts/audit/export-log.js"; 8 | 9 | describe("exportAuditLog", () => { 10 | it("masks sensitive tokens in audit output", async () => { 11 | const dir = await mkdtemp(join(tmpdir(), "audit-test-")); 12 | const output = join(dir, "audit.json"); 13 | const entries = [ 14 | { 15 | path: "secrets/sk-1234567890ABCDE", 16 | range: [1, 2] as [number, number], 17 | rationale: "Detected ghp_1234567890ABCDE during scan", 18 | }, 19 | ]; 20 | 21 | const file = exportAuditLog(entries, output); 22 | const content = await readFile(file, "utf8"); 23 | const parsed = JSON.parse(content) as { entries: Array<{ path: string; rationale: string }> }; 24 | 25 | expect(parsed.entries[0]?.path).toContain("***"); 26 | expect(parsed.entries[0]?.rationale).toContain("***"); 27 | 28 | await rm(dir, { recursive: true, force: true }); 29 | }); 30 | }); 31 | -------------------------------------------------------------------------------- /scripts/setup-golden.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | 4 | # Setup script for golden-set evaluation. 5 | # - Clones VS Code repo to external/vscode (shallow) if missing. 6 | # - Builds DuckDB index (.kiri/index.duckdb). 7 | # - Generates security lock file for the DB. 8 | # 9 | # Prerequisites: pnpm, Node 20, tsx available (pnpm install済み想定)。 10 | 11 | ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" 12 | REPO_PATH="$ROOT_DIR/external/vscode" 13 | DB_PATH="$REPO_PATH/.kiri/index.duckdb" 14 | LOCK_PATH="$REPO_PATH/.kiri/security.lock" 15 | 16 | if [[ ! -d "$REPO_PATH/.git" ]]; then 17 | echo "[setup-golden] Cloning vscode..." 18 | git clone --depth 1 https://github.com/microsoft/vscode.git "$REPO_PATH" 19 | else 20 | echo "[setup-golden] vscode repo already present. Skipping clone." 21 | fi 22 | 23 | mkdir -p "$REPO_PATH/.kiri" 24 | 25 | echo "[setup-golden] Building DuckDB index at $DB_PATH ..." 26 | pnpm exec tsx src/indexer/cli.ts --repo "$REPO_PATH" --db "$DB_PATH" --full 27 | 28 | echo "[setup-golden] Generating security lock ..." 29 | pnpm exec tsx src/client/cli.ts security verify --db "$DB_PATH" --security-lock "$LOCK_PATH" --write-lock 30 | 31 | echo "[setup-golden] Done. You can now run: pnpm run eval:golden" 32 | -------------------------------------------------------------------------------- /types/duckdb.d.ts: -------------------------------------------------------------------------------- 1 | declare module "duckdb" { 2 | type RunCallback = (err: Error | null) => void; 3 | type AllCallback = (err: Error | null, rows: T[]) => void; 4 | 5 | export interface Statement { 6 | run(callback: RunCallback): void; 7 | run(params: unknown[], callback: RunCallback): void; 8 | all>(callback: AllCallback): void; 9 | all>(params: unknown[], callback: AllCallback): void; 10 | finalize(callback: RunCallback): void; 11 | } 12 | 13 | export class Database { 14 | constructor(path: string); 15 | run(sql: string, callback: RunCallback): void; 16 | run(sql: string, params: unknown[], callback: RunCallback): void; 17 | all>(sql: string, callback: AllCallback): void; 18 | all>( 19 | sql: string, 20 | params: unknown[], 21 | callback: AllCallback 22 | ): void; 23 | exec(sql: string, callback: RunCallback): void; 24 | prepare(sql: string, callback: (err: Error | null, statement: Statement) => void): void; 25 | close(callback: RunCallback): void; 26 | } 27 | 28 | const duckdb: { 29 | Database: typeof Database; 30 | }; 31 | 32 | export default duckdb; 33 | } 34 | -------------------------------------------------------------------------------- /scripts/build/copy-assets.ts: -------------------------------------------------------------------------------- 1 | import { cp, mkdir, rm } from "node:fs/promises"; 2 | import { dirname, resolve } from "node:path"; 3 | import { fileURLToPath } from "node:url"; 4 | 5 | async function copyAssetDirectory( 6 | sourceRelativePath: string, 7 | destinationRelativePath: string 8 | ): Promise { 9 | const projectRoot = resolve(dirname(fileURLToPath(import.meta.url)), "../.."); 10 | const sourcePath = resolve(projectRoot, sourceRelativePath); 11 | const destinationPath = resolve(projectRoot, destinationRelativePath); 12 | 13 | await mkdir(dirname(destinationPath), { recursive: true }); 14 | await rm(destinationPath, { recursive: true, force: true }); 15 | await cp(sourcePath, destinationPath, { recursive: true }); 16 | } 17 | 18 | async function main(): Promise { 19 | await mkdir(resolve(dirname(fileURLToPath(import.meta.url)), "../../dist"), { recursive: true }); 20 | 21 | await copyAssetDirectory("config", "dist/config"); 22 | await copyAssetDirectory("sql", "dist/sql"); 23 | } 24 | 25 | main().catch((error) => { 26 | const message = error instanceof Error ? error.message : String(error); 27 | console.error( 28 | `Failed to copy assets. Ensure source directories exist and re-run build. ${message}` 29 | ); 30 | process.exitCode = 1; 31 | }); 32 | -------------------------------------------------------------------------------- /scripts/audit/export-log.ts: -------------------------------------------------------------------------------- 1 | import { mkdirSync, writeFileSync } from "node:fs"; 2 | import { dirname, resolve } from "node:path"; 3 | 4 | import { evaluateSecurityStatus } from "../../src/shared/security/config.js"; 5 | import { maskValue } from "../../src/shared/security/masker.js"; 6 | 7 | export interface AuditEntry { 8 | path: string; 9 | range: [number, number]; 10 | rationale: string; 11 | } 12 | 13 | export function exportAuditLog(entries: AuditEntry[], outputPath: string): string { 14 | const { config } = evaluateSecurityStatus(); 15 | const masked = maskValue(entries, { tokens: config.sensitive_tokens }); 16 | const absolute = resolve(process.cwd(), outputPath); 17 | mkdirSync(dirname(absolute), { recursive: true }); 18 | writeFileSync( 19 | absolute, 20 | JSON.stringify({ exportedAt: new Date().toISOString(), entries: masked.masked }, null, 2) 21 | ); 22 | return absolute; 23 | } 24 | 25 | const executedDirectly = 26 | typeof process.argv[1] === "string" && new URL(import.meta.url).pathname === process.argv[1]; 27 | 28 | if (executedDirectly) { 29 | const sample: AuditEntry[] = [ 30 | { path: "src/server/main.ts", range: [1, 20], rationale: "MCP起動処理の確認" }, 31 | ]; 32 | const output = exportAuditLog(sample, process.argv[2] ?? "var/audit/sample-log.json"); 33 | console.info(`監査ログを出力しました: ${output}`); 34 | } 35 | -------------------------------------------------------------------------------- /tests/indexer/dart/__fixtures__/outline-basic.json: -------------------------------------------------------------------------------- 1 | { 2 | "kind": "COMPILATION_UNIT", 3 | "offset": 0, 4 | "length": 180, 5 | "element": { 6 | "kind": "COMPILATION_UNIT", 7 | "name": "example.dart" 8 | }, 9 | "children": [ 10 | { 11 | "kind": "CLASS", 12 | "offset": 0, 13 | "length": 120, 14 | "element": { 15 | "kind": "CLASS", 16 | "name": "Greeter", 17 | "dartdoc": "A simple greeter class", 18 | "typeParameters": "" 19 | }, 20 | "children": [ 21 | { 22 | "kind": "METHOD", 23 | "offset": 30, 24 | "length": 50, 25 | "element": { 26 | "kind": "METHOD", 27 | "name": "sayHello", 28 | "parameters": "(String name)", 29 | "returnType": "String" 30 | } 31 | }, 32 | { 33 | "kind": "FIELD", 34 | "offset": 85, 35 | "length": 20, 36 | "element": { 37 | "kind": "FIELD", 38 | "name": "greeting" 39 | } 40 | } 41 | ] 42 | }, 43 | { 44 | "kind": "FUNCTION", 45 | "offset": 125, 46 | "length": 55, 47 | "element": { 48 | "kind": "FUNCTION", 49 | "name": "main", 50 | "parameters": "()", 51 | "returnType": "void" 52 | } 53 | } 54 | ] 55 | } 56 | -------------------------------------------------------------------------------- /docs/user/path-penalties.ja.md: -------------------------------------------------------------------------------- 1 | --- 2 | doc_id: "GUIDE-003" 3 | title: "Path Penalties ユーザーガイド" 4 | category: "configuration" 5 | tags: 6 | - path-penalty 7 | - boosting 8 | - config 9 | service: "kiri" 10 | --- 11 | 12 | # パス乗算ペナルティ設定ガイド(日本語) 13 | 14 | ## 目的 15 | 16 | リポジトリ固有の重要/不要ディレクトリをスコアリングで優先・抑制するために、`path_penalties` を設定します。 17 | 18 | ## 設定方法(推奨: YAML) 19 | 20 | `.kiri/config.yaml` をリポジトリルートに作成: 21 | 22 | ```yaml 23 | path_penalties: 24 | - prefix: src/ 25 | multiplier: 1.4 # src を強める 26 | - prefix: external/ 27 | multiplier: 0.3 # external を弱める 28 | ``` 29 | 30 | ## 環境変数での上書き 31 | 32 | - 形式: `KIRI_PATH_PENALTY_=` 33 | - `/` は `__` にエンコード。例: `KIRI_PATH_PENALTY_src__api__=0.8` 34 | 35 | ## 優先順位(後勝ち) 36 | 37 | `boost_profile` 定義 < 環境変数 < `.kiri/config.yaml` 38 | 39 | ## 正規化ルール 40 | 41 | - `\` を `/` に変換し POSIX 形式へ統一。 42 | - `../` やドライブレター(`C:\` など)は除去(リポジトリ相対のみ許容)。 43 | - 末尾 `/` は入力にあった場合のみ維持。 44 | 45 | ## 適用と反映タイミング 46 | 47 | - KIRI サーバー起動時に読み込み。 48 | - プロセス起動後に `.kiri/config.yaml` や環境変数を変更した場合は、**サーバー/デーモンの再起動が必要**(キャッシュ済みのため)。 49 | 50 | ## 確認手順(最小) 51 | 52 | 1. `.kiri/config.yaml` を設定 53 | 2. サーバー/デーモンを再起動 (`kiri --repo . --db .kiri/index.duckdb --watch` など) 54 | 3. `context_bundle` / `files_search` でパスに応じた順位変化を確認 55 | 56 | ## トラブルシュート 57 | 58 | - エラー例: `Path penalty prefix "..." must not contain ".."` 59 | → `..` を含むパスは拒否されます。リポジトリ相対で指定してください。 60 | - 反映されない: 設定変更後はサーバーを再起動してください(キャッシュが効きます)。 61 | -------------------------------------------------------------------------------- /src/indexer/queue.ts: -------------------------------------------------------------------------------- 1 | import PQueue from "p-queue"; 2 | 3 | /** 4 | * DuckDB single-writer制約対応: databasePathごとにindexer実行をキューイング 5 | * 6 | * 背景: 7 | * - DuckDBは1ファイル1ライターのみサポート 8 | * - fts_status/generation追加でALTER TABLE + 全件UPDATEが実行され、接続時間が延長 9 | * - 複数indexer並列実行時にカタログ競合が発生し、repo table消失やデータ損失が起きる 10 | * 11 | * 解決策: 12 | * - 同じdatabasePathに対するrunIndexer()呼び出しを直列化 13 | * - p-queue (concurrency: 1) でファイルごとに排他制御 14 | * 15 | * @see Critical Review Fix #5 - Concurrency test failures 16 | */ 17 | 18 | const queueMap = new Map(); 19 | 20 | /** 21 | * 指定されたdatabasePath用のキューを取得(初回はconcurrency=1で作成) 22 | * 23 | * @param databasePath - DuckDBファイルパス 24 | * @returns databasePath専用のキュー 25 | */ 26 | export function getIndexerQueue(databasePath: string): PQueue { 27 | if (!queueMap.has(databasePath)) { 28 | queueMap.set(databasePath, new PQueue({ concurrency: 1 })); 29 | } 30 | return queueMap.get(databasePath)!; 31 | } 32 | 33 | /** 34 | * テスト用: 特定databasePathのキューをクリア 35 | * 36 | * @param databasePath - クリア対象のパス 37 | */ 38 | export function clearQueue(databasePath: string): void { 39 | const queue = queueMap.get(databasePath); 40 | if (queue) { 41 | queue.clear(); 42 | queueMap.delete(databasePath); 43 | } 44 | } 45 | 46 | /** 47 | * テスト用: すべてのキューをクリア 48 | */ 49 | export function clearAllQueues(): void { 50 | for (const queue of queueMap.values()) { 51 | queue.clear(); 52 | } 53 | queueMap.clear(); 54 | } 55 | -------------------------------------------------------------------------------- /assay.config.ts: -------------------------------------------------------------------------------- 1 | import type { AssayConfig } from "external/assay-kit/src/config/types.ts"; 2 | 3 | const config: AssayConfig = { 4 | baseline: { 5 | providers: { 6 | localGit: { 7 | module: "builtin:git", 8 | trusted: true, 9 | options: { 10 | repoPath: ".", 11 | storagePath: "baselines", 12 | maxVersions: 20, 13 | }, 14 | }, 15 | }, 16 | targets: { 17 | "vscode-golden": { 18 | provider: "localGit", 19 | scope: { 20 | dataset: "vscode", 21 | type: "golden", 22 | }, 23 | defaultThresholdProfileId: "vscode-tight", 24 | }, 25 | }, 26 | thresholds: { 27 | "vscode-tight": { 28 | rules: [ 29 | { 30 | metric: "precision", 31 | direction: "higher", 32 | strategy: "relative", 33 | limit: 0.05, 34 | label: "Precision P@10", 35 | }, 36 | { 37 | metric: "latencyMs", 38 | direction: "lower", 39 | strategy: "relative", 40 | limit: 0.1, 41 | label: "Avg latency", 42 | }, 43 | { 44 | metric: "timeToFirstUseful", 45 | direction: "lower", 46 | strategy: "absolute", 47 | limit: 0.5, 48 | label: "TTFU (s)", 49 | }, 50 | ], 51 | fallback: "fail", 52 | }, 53 | }, 54 | }, 55 | }; 56 | 57 | export default config; 58 | -------------------------------------------------------------------------------- /tests/shared/tokenizer.spec.ts: -------------------------------------------------------------------------------- 1 | import { describe, expect, it } from "vitest"; 2 | 3 | import { tokenizeText } from "../../src/shared/tokenizer.js"; 4 | 5 | describe("tokenizeText", () => { 6 | it("retains hyphenated phrases while exposing split parts", () => { 7 | const tokens = tokenizeText("Mann-Whitney U test", "phrase-aware"); 8 | expect(tokens).toEqual(expect.arrayContaining(["mann-whitney", "mann", "whitney", "test"])); 9 | }); 10 | 11 | it("splits snake_case identifiers", () => { 12 | const tokens = tokenizeText("rank_biserial_effect", "phrase-aware"); 13 | expect(tokens).toEqual( 14 | expect.arrayContaining(["rank_biserial_effect", "rank", "biserial", "effect"]) 15 | ); 16 | }); 17 | 18 | it("splits camelCase identifiers", () => { 19 | const tokens = tokenizeText("workerPoolScheduler handler", "phrase-aware"); 20 | expect(tokens).toEqual( 21 | expect.arrayContaining(["workerpoolscheduler", "worker", "pool", "scheduler", "handler"]) 22 | ); 23 | }); 24 | 25 | it("splits alphanumeric boundaries", () => { 26 | const tokens = tokenizeText("ISO8601Parser", "phrase-aware"); 27 | expect(tokens).toEqual(expect.arrayContaining(["iso8601parser", "iso", "8601", "parser"])); 28 | }); 29 | 30 | it("preserves legacy behavior by excluding hyphenated phrases", () => { 31 | const tokens = tokenizeText("foo-bar baz", "legacy"); 32 | expect(tokens).toEqual(expect.arrayContaining(["foo", "bar", "baz"])); 33 | expect(tokens).not.toContain("foo-bar"); 34 | }); 35 | }); 36 | -------------------------------------------------------------------------------- /tests/helpers/db-setup.ts: -------------------------------------------------------------------------------- 1 | import { mkdtemp, rm } from "node:fs/promises"; 2 | import { tmpdir } from "node:os"; 3 | import { join } from "node:path"; 4 | 5 | import { DuckDBClient } from "../../src/shared/duckdb.js"; 6 | 7 | /** 8 | * Create a temporary DuckDB database for testing 9 | * 10 | * @returns Object with db instance, temp directory path, and cleanup function 11 | */ 12 | export async function createTestDb(): Promise<{ 13 | db: DuckDBClient; 14 | tempDir: string; 15 | dbPath: string; 16 | cleanup: () => Promise; 17 | }> { 18 | const tempDir = await mkdtemp(join(tmpdir(), "kiri-test-")); 19 | const dbPath = join(tempDir, "test.duckdb"); 20 | const db = await DuckDBClient.connect({ databasePath: dbPath }); 21 | 22 | return { 23 | db, 24 | tempDir, 25 | dbPath, 26 | cleanup: async () => { 27 | await db.close(); 28 | await rm(tempDir, { recursive: true, force: true }); 29 | }, 30 | }; 31 | } 32 | 33 | /** 34 | * Create a temporary database path (without connecting) 35 | * Useful for tests that need to control DB connection lifecycle 36 | * 37 | * @returns Object with path and cleanup function 38 | */ 39 | export async function createTempDbPath(): Promise<{ 40 | path: string; 41 | cleanup: () => Promise; 42 | }> { 43 | const dir = await mkdtemp(join(tmpdir(), "kiri-bootstrap-db-")); 44 | const dbPath = join(dir, "index.duckdb"); 45 | return { 46 | path: dbPath, 47 | cleanup: async () => { 48 | await rm(dir, { recursive: true, force: true }); 49 | }, 50 | }; 51 | } 52 | -------------------------------------------------------------------------------- /docs/formal/PathPenaltyEncoding.tla: -------------------------------------------------------------------------------- 1 | ---- MODULE PathPenaltyEncoding ---- 2 | EXTENDS Naturals, Sequences 3 | 4 | (* 5 | Utility operators for encoding/decoding path prefixes and handling 6 | platform-specific normalization. Separated from PathPenaltyMerge so the 7 | encoding logic can be tested or replaced independently (e.g., Alloy models). 8 | *) 9 | 10 | Slash == "/" 11 | Underscore == "_" 12 | 13 | TailStr(str) == 14 | IF Len(str) <= 1 15 | THEN "" 16 | ELSE SubSeq(str, 2, Len(str)) 17 | 18 | DropChars(str, n) == 19 | IF Len(str) <= n 20 | THEN "" 21 | ELSE SubSeq(str, n + 1, Len(str)) 22 | 23 | RECURSIVE EncodeStr(_) 24 | EncodeStr(str) == 25 | IF Len(str) = 0 26 | THEN "" 27 | ELSE 28 | LET ch == SubSeq(str, 1, 1) 29 | IN 30 | IF ch = Slash 31 | THEN "__" \o EncodeStr(TailStr(str)) 32 | ELSE ch \o EncodeStr(TailStr(str)) 33 | 34 | Encode(prefix) == EncodeStr(prefix) 35 | 36 | RECURSIVE DecodeStr(_) 37 | DecodeStr(str) == 38 | IF Len(str) = 0 39 | THEN "" 40 | ELSE 41 | LET first == SubSeq(str, 1, 1) 42 | IN 43 | IF first = Underscore /\ Len(str) >= 2 /\ SubSeq(str, 2, 2) = Underscore 44 | THEN Slash \o DecodeStr(DropChars(str, 2)) 45 | ELSE first \o DecodeStr(TailStr(str)) 46 | 47 | Decode(encoded) == DecodeStr(encoded) 48 | 49 | (* 50 | Placeholder for platform normalization. Production code resolves ".", "..", 51 | Windows drive letters, and "\" separators. Here we treat prefixes as already 52 | normalized to keep the state space finite while still documenting the contract. 53 | *) 54 | OSNormalize(prefix) == prefix 55 | 56 | ==== 57 | -------------------------------------------------------------------------------- /src/indexer/codeintel/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Language Analyzer System - Public API 3 | * 4 | * このモジュールは言語アナライザーシステムの公開APIを提供します。 5 | * 6 | * 使用例: 7 | * ```typescript 8 | * import { LanguageRegistry, type LanguageAnalyzer } from './codeintel/index.js'; 9 | * 10 | * const registry = LanguageRegistry.getInstance(); 11 | * registry.register(new TypeScriptAnalyzer()); 12 | * 13 | * const result = await registry.analyze('TypeScript', { 14 | * pathInRepo: 'src/index.ts', 15 | * content: '...', 16 | * fileSet: new Set(['src/index.ts']), 17 | * }); 18 | * ``` 19 | */ 20 | 21 | // Types 22 | export type { 23 | SymbolRecord, 24 | SnippetRecord, 25 | DependencyRecord, 26 | AnalysisContext, 27 | AnalysisResult, 28 | LanguageAnalyzer, 29 | } from "./types.js"; 30 | 31 | export { emptyResult } from "./types.js"; 32 | 33 | // Registry 34 | export { LanguageRegistry } from "./registry.js"; 35 | 36 | // Utilities 37 | export { 38 | treeSitterPointToLine, 39 | sanitizeTreeSitterSignature, 40 | assignSymbolIds, 41 | symbolsToSnippets, 42 | createDependencyRecorder, 43 | buildLineStartsArray, 44 | offsetToLine, 45 | cleanDocComment, 46 | buildFallbackSnippet, 47 | } from "./utils.js"; 48 | 49 | // Language Analyzers 50 | export { TypeScriptAnalyzer, createTypeScriptAnalyzer } from "./typescript/index.js"; 51 | export { SwiftAnalyzer, createSwiftAnalyzer } from "./swift/index.js"; 52 | export { PHPAnalyzer, createPHPAnalyzer } from "./php/index.js"; 53 | export { JavaAnalyzer, createJavaAnalyzer } from "./java/index.js"; 54 | export { DartAnalyzer, createDartAnalyzer } from "./dart/index.js"; 55 | export { RustAnalyzer, createRustAnalyzer } from "./rust/index.js"; 56 | -------------------------------------------------------------------------------- /tests/helpers/test-repo.ts: -------------------------------------------------------------------------------- 1 | import { execFile } from "node:child_process"; 2 | import { realpathSync } from "node:fs"; 3 | import { mkdtemp, rm, writeFile, mkdir } from "node:fs/promises"; 4 | import { tmpdir } from "node:os"; 5 | import { dirname, join } from "node:path"; 6 | import { promisify } from "node:util"; 7 | 8 | const execFileAsync = promisify(execFile); 9 | 10 | export interface TempRepo { 11 | path: string; 12 | cleanup: () => Promise; 13 | } 14 | 15 | export async function createTempRepo(files: Record): Promise { 16 | const prefix = join(tmpdir(), "kiri-repo-"); 17 | const repoDir = await mkdtemp(prefix); 18 | 19 | await execFileAsync("git", ["init"], { cwd: repoDir }); 20 | await execFileAsync("git", ["config", "user.email", "test@example.com"], { cwd: repoDir }); 21 | await execFileAsync("git", ["config", "user.name", "Kiri Tester"], { cwd: repoDir }); 22 | await execFileAsync("git", ["config", "commit.gpgsign", "false"], { cwd: repoDir }); 23 | 24 | for (const [relativePath, content] of Object.entries(files)) { 25 | const fullPath = join(repoDir, relativePath); 26 | await mkdir(dirname(fullPath), { recursive: true }); 27 | await writeFile(fullPath, content); 28 | } 29 | 30 | await execFileAsync("git", ["add", "."], { cwd: repoDir }); 31 | await execFileAsync("git", ["commit", "-m", "init"], { cwd: repoDir }); 32 | 33 | // Normalize path to match what runIndexer stores (Fix #2 compatibility) 34 | const normalizedPath = realpathSync.native(repoDir); 35 | 36 | return { 37 | path: normalizedPath, 38 | cleanup: async () => { 39 | await rm(repoDir, { recursive: true, force: true }); 40 | }, 41 | }; 42 | } 43 | -------------------------------------------------------------------------------- /src/server/services/index.ts: -------------------------------------------------------------------------------- 1 | import { DuckDBClient } from "../../shared/duckdb.js"; 2 | import { 3 | loadDomainTerms, 4 | type DomainExpansion, 5 | type DomainTermsDictionary, 6 | } from "../domain-terms.js"; 7 | import { loadStopWords, type StopWordsService } from "../stop-words.js"; 8 | 9 | import { RepoRepository } from "./repo-repository.js"; 10 | import { RepoResolver } from "./repo-resolver.js"; 11 | 12 | /** 13 | * ServerServices 14 | * 15 | * サーバー全体で共有されるサービスの集合。 16 | * リクエスト間で共有され、単一のインスタンスを持つ。 17 | */ 18 | export interface ServerServices { 19 | repoRepository: RepoRepository; 20 | repoResolver: RepoResolver; 21 | domainTerms: DomainTermsDictionary; 22 | stopWords: StopWordsService; 23 | } 24 | 25 | /** 26 | * createServerServices 27 | * 28 | * サーバーサービスを初期化して返す。 29 | * サーバー起動時に一度だけ呼び出される。 30 | * 31 | * @param db - DuckDBクライアント 32 | * @returns 初期化されたサービス群 33 | */ 34 | export function createServerServices(db: DuckDBClient): ServerServices { 35 | const repoRepository = new RepoRepository(db); 36 | const repoResolver = new RepoResolver(repoRepository); 37 | const domainTerms = 38 | process.env.KIRI_ENABLE_DOMAIN_TERMS === "1" 39 | ? loadDomainTerms() 40 | : (new (class EmptyDict { 41 | expandFromText(): DomainExpansion { 42 | return { matched: [], aliases: [], fileHints: [] }; 43 | } 44 | expandCandidates(): DomainExpansion { 45 | return { matched: [], aliases: [], fileHints: [] }; 46 | } 47 | })() as unknown as DomainTermsDictionary); 48 | const stopWords = loadStopWords(); 49 | 50 | return { 51 | repoRepository, 52 | repoResolver, 53 | domainTerms, 54 | stopWords, 55 | }; 56 | } 57 | -------------------------------------------------------------------------------- /tests/eval/metrics.spec.ts: -------------------------------------------------------------------------------- 1 | import { describe, expect, it } from "vitest"; 2 | 3 | import { 4 | evaluateRetrieval, 5 | precisionAtK, 6 | timeToFirstUseful, 7 | type LatencyEvent, 8 | } from "../../src/eval/metrics.js"; 9 | 10 | describe("evaluation metrics", () => { 11 | it("computes precision at K", () => { 12 | const retrieved = ["a", "b", "c", "d"]; 13 | const relevant = new Set(["a", "c", "x"]); 14 | expect(precisionAtK(retrieved, relevant, 3)).toBeCloseTo(2 / 3); 15 | expect(precisionAtK(retrieved, relevant, 1)).toBe(1); 16 | expect(precisionAtK([], relevant, 5)).toBe(0); 17 | }); 18 | 19 | it("computes time to first useful result in seconds", () => { 20 | const events: LatencyEvent[] = [ 21 | { timestampMs: 40, relevant: false }, 22 | { timestampMs: 120, relevant: true }, 23 | { timestampMs: 200, relevant: false }, 24 | ]; 25 | expect(timeToFirstUseful(events)).toBeCloseTo(0.08, 2); 26 | }); 27 | 28 | it("returns infinity when no useful result arrives", () => { 29 | const events: LatencyEvent[] = [ 30 | { timestampMs: 10, relevant: false }, 31 | { timestampMs: 20, relevant: false }, 32 | ]; 33 | expect(timeToFirstUseful(events)).toBe(Number.POSITIVE_INFINITY); 34 | }); 35 | 36 | it("evaluates combined retrieval metrics", () => { 37 | const metrics = evaluateRetrieval({ 38 | items: [ 39 | { id: "x", timestampMs: 0 }, 40 | { id: "y", timestampMs: 150 }, 41 | { id: "z", timestampMs: 320 }, 42 | ], 43 | relevant: new Set(["y", "z"]), 44 | k: 2, 45 | }); 46 | expect(metrics.precisionAtK).toBeCloseTo(1 / 2); 47 | expect(metrics.timeToFirstUseful).toBeCloseTo(0.15, 2); 48 | }); 49 | }); 50 | -------------------------------------------------------------------------------- /scripts/diag/cleanup-hints.ts: -------------------------------------------------------------------------------- 1 | import process from "node:process"; 2 | 3 | import { DuckDBClient } from "../../src/shared/duckdb.js"; 4 | 5 | interface CleanupArgs { 6 | databasePath: string; 7 | days: number; 8 | } 9 | 10 | function parseArgs(argv: string[]): CleanupArgs { 11 | let databasePath: string | undefined; 12 | let days = 14; 13 | for (let i = 0; i < argv.length; i += 1) { 14 | const arg = argv[i]; 15 | if (arg === "--db") { 16 | databasePath = argv[++i]; 17 | } else if (arg === "--days") { 18 | const value = Number.parseInt(argv[++i] ?? "", 10); 19 | if (Number.isFinite(value) && value >= 1) { 20 | days = value; 21 | } 22 | } 23 | } 24 | if (!databasePath) { 25 | throw new Error("cleanup-hints requires --db "); 26 | } 27 | return { databasePath, days }; 28 | } 29 | 30 | export async function main(argv = process.argv.slice(2)): Promise { 31 | const args = parseArgs(argv); 32 | const db = await DuckDBClient.connect({ 33 | databasePath: args.databasePath, 34 | ensureDirectory: false, 35 | }); 36 | try { 37 | await db.run( 38 | ` 39 | DELETE FROM hint_expansion 40 | WHERE created_at < NOW() - INTERVAL ? DAY 41 | `, 42 | [args.days] 43 | ); 44 | console.info(`Deleted hint_expansion rows older than ${args.days} day(s).`); 45 | } finally { 46 | await db.close(); 47 | } 48 | } 49 | 50 | const executedDirectly = 51 | typeof process.argv[1] === "string" && new URL(import.meta.url).pathname === process.argv[1]; 52 | 53 | if (executedDirectly) { 54 | main().catch((error) => { 55 | console.error("Failed to cleanup hint_expansion table:", error); 56 | process.exitCode = 1; 57 | }); 58 | } 59 | -------------------------------------------------------------------------------- /tests/shared/masker.spec.ts: -------------------------------------------------------------------------------- 1 | import { describe, expect, it } from "vitest"; 2 | 3 | import { maskValue } from "../../src/shared/security/masker.js"; 4 | 5 | describe("maskValue", () => { 6 | it("ignores prefix tokens embedded within normal words", () => { 7 | const input = "lambda/ask-agent/src/handler.ts"; 8 | const result = maskValue(input, { tokens: ["sk-"] }); 9 | 10 | expect(result.masked).toBe(input); 11 | expect(result.applied).toBe(0); 12 | }); 13 | 14 | it("still avoids masking when the prefix is mid-word even if the suffix is long", () => { 15 | const input = "foomask-1234567890ABCDE"; 16 | const result = maskValue(input, { tokens: ["sk-"] }); 17 | 18 | expect(result.masked).toBe(input); 19 | expect(result.applied).toBe(0); 20 | }); 21 | 22 | it("masks prefix tokens when preceded by a boundary and followed by a long suffix", () => { 23 | const secret = "sk-1234567890ABCDE"; 24 | const result = maskValue(`API key: ${secret}`, { tokens: ["sk-"] }); 25 | 26 | expect(result.masked).toBe("API key: ***"); 27 | expect(result.applied).toBe(1); 28 | }); 29 | 30 | it("respects skipKeys to keep structural fields like path untouched", () => { 31 | const payload = { path: "sk-1234567890ABCDE" }; 32 | const result = maskValue(payload, { tokens: ["sk-"], skipKeys: ["path"] }); 33 | 34 | expect(result.masked).toEqual(payload); 35 | expect(result.applied).toBe(0); 36 | }); 37 | 38 | it("continues masking literal tokens that do not look like prefixes", () => { 39 | const pem = "-----BEGIN RSA PRIVATE KEY-----"; 40 | const result = maskValue(pem, { tokens: ["-----BEGIN"] }); 41 | 42 | expect(result.masked).toBe("*** RSA PRIVATE KEY-----"); 43 | expect(result.applied).toBe(1); 44 | }); 45 | }); 46 | -------------------------------------------------------------------------------- /docs/user/path-penalties.md: -------------------------------------------------------------------------------- 1 | --- 2 | doc_id: "GUIDE-002" 3 | title: "Path Penalties User Guide" 4 | category: "configuration" 5 | tags: 6 | - path-penalty 7 | - boosting 8 | - config 9 | service: "kiri" 10 | --- 11 | 12 | # Path Penalties User Guide 13 | 14 | ## Goal 15 | 16 | Tune ranking for repository-specific directories by applying multiplicative path multipliers. 17 | 18 | ## Recommended Setup (YAML) 19 | 20 | Create `.kiri/config.yaml` at the repo root: 21 | 22 | ```yaml 23 | path_penalties: 24 | - prefix: src/ 25 | multiplier: 1.4 # boost src 26 | - prefix: external/ 27 | multiplier: 0.3 # down-weight external 28 | ``` 29 | 30 | ## Environment Variable Override 31 | 32 | - Format: `KIRI_PATH_PENALTY_=` 33 | - Encode `/` as `__`. Example: `KIRI_PATH_PENALTY_src__api__=0.8` 34 | 35 | ## Precedence (last wins) 36 | 37 | `boost_profile` defaults < Environment variables < `.kiri/config.yaml` 38 | 39 | ## Normalization Rules 40 | 41 | - Convert `\` to `/` (POSIX style). 42 | - Strip drive letters and reject paths containing `..` (repo-relative only). 43 | - Preserve trailing `/` only if provided. 44 | 45 | ## Application & Refresh 46 | 47 | - Loaded at server/daemon start. 48 | - After editing `.kiri/config.yaml` or env vars, **restart the server/daemon** to refresh (values are cached in-process). 49 | 50 | ## Quick Verification 51 | 52 | 1. Add `.kiri/config.yaml` 53 | 2. Restart server/daemon (`kiri --repo . --db .kiri/index.duckdb --watch`, etc.) 54 | 3. Run `context_bundle` or `files_search` and confirm ranking shifts for the targeted paths. 55 | 56 | ## Troubleshooting 57 | 58 | - Error: `Path penalty prefix "..." must not contain ".."` → remove `..`; only repo-relative paths are allowed. 59 | - Not applied: restart the process to drop caches after config changes. 60 | -------------------------------------------------------------------------------- /scripts/assay/plugins/context-coverage-metric.ts: -------------------------------------------------------------------------------- 1 | interface Logger { 2 | info(message: string): void; 3 | } 4 | 5 | interface MetricPluginContext { 6 | logger: Logger; 7 | } 8 | 9 | type MetricDirection = "higher" | "lower" | "neutral"; 10 | 11 | interface MetricPlugin { 12 | kind: "metric"; 13 | meta: { 14 | name: string; 15 | version: string; 16 | assay: string; 17 | description: string; 18 | }; 19 | init(context: MetricPluginContext): void | Promise; 20 | activate(): Promise<{ 21 | id: string; 22 | calculate(): Promise>; 23 | metadata: { 24 | displayName: string; 25 | description: string; 26 | direction: MetricDirection; 27 | }; 28 | }>; 29 | dispose(): void | Promise; 30 | } 31 | 32 | export const contextCoverageMetric: MetricPlugin = { 33 | kind: "metric", 34 | meta: { 35 | name: "context-coverage", 36 | version: "1.0.0", 37 | assay: ">=0.1.1", 38 | description: "Measures how well retrieved context covers expected paths", 39 | }, 40 | async init(context: MetricPluginContext) { 41 | context.logger.info("Context coverage metric plugin initialized"); 42 | }, 43 | async activate() { 44 | return { 45 | id: "context-coverage", 46 | async calculate() { 47 | // Placeholder calculation. Real implementation would inspect query-level results. 48 | return { 49 | contextCoverage: 0.85, 50 | pathOverlap: 0.72, 51 | } as Record; 52 | }, 53 | metadata: { 54 | displayName: "Context Coverage", 55 | description: "Fixed sample metric for regression checks", 56 | direction: "higher", 57 | }, 58 | }; 59 | }, 60 | async dispose() { 61 | // Nothing to dispose 62 | }, 63 | }; 64 | 65 | export default contextCoverageMetric; 66 | -------------------------------------------------------------------------------- /eslint.config.js: -------------------------------------------------------------------------------- 1 | import eslint from "@eslint/js"; 2 | import tseslint from "@typescript-eslint/eslint-plugin"; 3 | import tsparser from "@typescript-eslint/parser"; 4 | import importPlugin from "eslint-plugin-import"; 5 | import prettierConfig from "eslint-config-prettier"; 6 | 7 | export default [ 8 | { 9 | ignores: ["dist/**", "var/**"] 10 | }, 11 | eslint.configs.recommended, 12 | { 13 | files: ["**/*.{ts,tsx}"], 14 | languageOptions: { 15 | parser: tsparser, 16 | parserOptions: { 17 | project: false, 18 | ecmaVersion: 2022, 19 | sourceType: "module" 20 | }, 21 | globals: { 22 | console: "readonly", 23 | process: "readonly", 24 | Buffer: "readonly", 25 | URL: "readonly", 26 | __dirname: "readonly", 27 | __filename: "readonly", 28 | exports: "writable", 29 | module: "writable", 30 | require: "readonly", 31 | // Node.js globals 32 | AbortController: "readonly", 33 | AbortSignal: "readonly", 34 | setTimeout: "readonly", 35 | clearTimeout: "readonly", 36 | setInterval: "readonly", 37 | clearInterval: "readonly", 38 | // TypeScript/Node.js namespace 39 | NodeJS: "readonly" 40 | } 41 | }, 42 | plugins: { 43 | "@typescript-eslint": tseslint, 44 | import: importPlugin 45 | }, 46 | rules: { 47 | ...tseslint.configs.recommended.rules, 48 | "import/order": [ 49 | "warn", 50 | { 51 | alphabetize: { order: "asc", caseInsensitive: true }, 52 | "newlines-between": "always" 53 | } 54 | ], 55 | "@typescript-eslint/no-unused-vars": [ 56 | "error", 57 | { argsIgnorePattern: "^_", varsIgnorePattern: "^_" } 58 | ] 59 | } 60 | }, 61 | prettierConfig 62 | ]; 63 | -------------------------------------------------------------------------------- /tests/shared/adaptive-k.spec.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from "vitest"; 2 | 3 | import { getAdaptiveK } from "../../src/shared/adaptive-k.js"; 4 | import { validateAdaptiveKConfig } from "../../src/shared/config-validate-adaptive-k.js"; 5 | 6 | const baseConfig = { 7 | enabled: true, 8 | allowedSet: [5, 10, 20], 9 | kMin: 3, 10 | kMax: 50, 11 | kMap: { 12 | bugfix: 5, 13 | integration: 5, 14 | testfail: 20, 15 | performance: 20, 16 | }, 17 | kDefault: 10, 18 | kWhenDisabled: 10, 19 | }; 20 | 21 | describe("getAdaptiveK", () => { 22 | it("returns category mapping when enabled", () => { 23 | expect(getAdaptiveK("bugfix", baseConfig)).toBe(5); 24 | expect(getAdaptiveK("testfail", baseConfig)).toBe(20); 25 | expect(getAdaptiveK("performance", baseConfig)).toBe(20); 26 | expect(getAdaptiveK("unknown", baseConfig)).toBe(10); 27 | }); 28 | 29 | it("returns kWhenDisabled when disabled", () => { 30 | const cfg = { ...baseConfig, enabled: false, kWhenDisabled: 7 }; 31 | expect(getAdaptiveK("bugfix", cfg)).toBe(7); 32 | expect(getAdaptiveK(undefined, cfg)).toBe(7); 33 | }); 34 | }); 35 | 36 | describe("validateAdaptiveKConfig", () => { 37 | it("accepts valid config", () => { 38 | expect(() => validateAdaptiveKConfig(baseConfig)).not.toThrow(); 39 | }); 40 | 41 | it("rejects allowedSet outside range", () => { 42 | const cfg = { ...baseConfig, allowedSet: [2] }; 43 | expect(() => validateAdaptiveKConfig(cfg)).toThrow(); 44 | }); 45 | 46 | it("rejects empty allowedSet", () => { 47 | const cfg = { ...baseConfig, allowedSet: [] }; 48 | expect(() => validateAdaptiveKConfig(cfg)).toThrow(); 49 | }); 50 | 51 | it("rejects kDefault not in allowedSet when enabled", () => { 52 | const cfg = { ...baseConfig, kDefault: 15 }; 53 | expect(() => validateAdaptiveKConfig(cfg)).toThrow(); 54 | }); 55 | }); 56 | -------------------------------------------------------------------------------- /docs/formal/AdaptiveK.tla: -------------------------------------------------------------------------------- 1 | ---- MODULE AdaptiveK ---- 2 | EXTENDS Naturals, Sequences 3 | 4 | CONSTANTS 5 | CATEGORIES, 6 | ALLOWED_SET, 7 | K_MIN, 8 | K_MAX, 9 | K_BUGFIX, 10 | K_INTEGRATION, 11 | K_TESTFAIL, 12 | K_PERFORMANCE, 13 | K_DEFAULT 14 | 15 | (*************************************************************************) 16 | (* Adaptive K mapping for search queries *) 17 | (*************************************************************************) 18 | AdaptiveK(cat) == 19 | IF cat = "bugfix" THEN K_BUGFIX 20 | ELSE IF cat = "integration" THEN K_INTEGRATION 21 | ELSE IF cat = "testfail" THEN K_TESTFAIL 22 | ELSE IF cat = "performance" THEN K_PERFORMANCE 23 | ELSE K_DEFAULT 24 | 25 | VARIABLES queryCat, k 26 | 27 | Init == 28 | /\ queryCat \in CATEGORIES 29 | /\ k = AdaptiveK(queryCat) 30 | 31 | Next == 32 | \E newCat \in CATEGORIES: 33 | /\ queryCat' = newCat 34 | /\ k' = AdaptiveK(newCat) 35 | 36 | InvAllowedSet == k \in ALLOWED_SET 37 | 38 | InvRange == K_MIN <= k /\ k <= K_MAX 39 | 40 | InvBugfixPrecision == (queryCat = "bugfix") => k = K_BUGFIX 41 | 42 | InvIntegrationPrecision == (queryCat = "integration") => k = K_INTEGRATION 43 | 44 | InvTestfailRecall == (queryCat = "testfail") => k = K_TESTFAIL 45 | 46 | InvPerformanceRecall == (queryCat = "performance") => k = K_PERFORMANCE 47 | 48 | InvGenericBalance == (queryCat \notin {"bugfix", "integration", "testfail", "performance"}) => k = K_DEFAULT 49 | 50 | Spec == Init /\ [][Next]_<> 51 | 52 | THEOREM InvAllowedSetInvariant == Spec => []InvAllowedSet 53 | THEOREM InvRangeInvariant == Spec => []InvRange 54 | THEOREM InvBugfixPrecisionInvariant == Spec => []InvBugfixPrecision 55 | THEOREM InvIntegrationPrecisionInvariant == Spec => []InvIntegrationPrecision 56 | THEOREM InvTestfailRecallInvariant == Spec => []InvTestfailRecall 57 | THEOREM InvPerformanceRecallInvariant == Spec => []InvPerformanceRecall 58 | THEOREM InvGenericBalanceInvariant == Spec => []InvGenericBalance 59 | 60 | ==== 61 | -------------------------------------------------------------------------------- /src/indexer/dart/config.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Dart Analysis Server 設定のパース・バリデーション 3 | * 4 | * Fix #17-21 (Codex Critical Review Round 3): 5 | * 環境変数の異常値(NaN、空文字、負数)からデフォルト値へのフォールバックを一元管理 6 | */ 7 | 8 | /** 9 | * MAX_CLIENTS環境変数をパースして検証 10 | * 11 | * @returns 検証済みの MAX_CLIENTS 値(デフォルト: 8) 12 | */ 13 | export function parseMaxClients(): number { 14 | const parsed = Number.parseInt(process.env.DART_ANALYSIS_MAX_CLIENTS ?? "8", 10); 15 | return Number.isFinite(parsed) && parsed > 0 ? parsed : 8; 16 | } 17 | 18 | /** 19 | * CLIENT_WAIT_MS環境変数をパースして検証 20 | * 21 | * @returns 検証済みの CLIENT_WAIT_MS 値(デフォルト: 10000) 22 | */ 23 | export function parseClientWaitMs(): number { 24 | const parsed = Number.parseInt(process.env.DART_ANALYSIS_CLIENT_WAIT_MS ?? "10000", 10); 25 | return Number.isFinite(parsed) && parsed > 0 ? parsed : 10000; 26 | } 27 | 28 | /** 29 | * IDLE_TTL_MS環境変数をパースして検証 30 | * 31 | * 0 = TTL無効化(無期限保持、LRUで管理) 32 | * 負数 = デフォルト値にフォールバック 33 | * 34 | * @returns 検証済みの IDLE_TTL_MS 値(デフォルト: 60000) 35 | */ 36 | export function parseIdleTtlMs(): number { 37 | const envValue = process.env.DART_ANALYSIS_IDLE_MS ?? "60000"; 38 | 39 | // Empty string check first (Number("") returns 0, which is ambiguous) 40 | if (envValue.trim() === "") { 41 | return 60000; 42 | } 43 | 44 | const parsed = Number(envValue); 45 | 46 | // NaN check (handles non-numeric values) 47 | if (!Number.isFinite(parsed)) { 48 | return 60000; 49 | } 50 | 51 | // Explicit 0 is valid (TTL disabled) 52 | if (parsed === 0) { 53 | return 0; 54 | } 55 | 56 | // Negative values fallback to default 57 | return parsed > 0 ? parsed : 60000; 58 | } 59 | 60 | /** 61 | * FILE_QUEUE_TTL_MS環境変数をパースして検証 62 | * 63 | * 最小値: 1000ms(メモリリーク防止) 64 | * 65 | * @returns 検証済みの FILE_QUEUE_TTL_MS 値(デフォルト: 30000、最小: 1000) 66 | */ 67 | export function parseFileQueueTtlMs(): number { 68 | const parsed = Number.parseInt(process.env.DART_FILE_QUEUE_TTL_MS ?? "30000", 10); 69 | return Number.isFinite(parsed) ? Math.max(1000, parsed) : 30000; 70 | } 71 | -------------------------------------------------------------------------------- /src/server/observability/tracing.ts: -------------------------------------------------------------------------------- 1 | import type { Span } from "@opentelemetry/api"; 2 | 3 | let tracerPromise: Promise<{ 4 | startActiveSpan: (name: string, fn: (span: Span) => Promise) => Promise; 5 | } | null>; 6 | 7 | async function loadTracer() { 8 | if (!tracerPromise) { 9 | tracerPromise = (async () => { 10 | try { 11 | const api = await import("@opentelemetry/api"); 12 | return { 13 | startActiveSpan: async (name: string, fn: (span: Span) => Promise) => { 14 | return await api.trace.getTracer("kiri").startActiveSpan(name, async (span) => { 15 | try { 16 | return await fn(span); 17 | } finally { 18 | span.end(); 19 | } 20 | }); 21 | }, 22 | }; 23 | } catch { 24 | return null; 25 | } 26 | })(); 27 | } 28 | return tracerPromise; 29 | } 30 | 31 | export async function withSpan(name: string, fn: (span: Span | null) => Promise): Promise { 32 | const tracer = await loadTracer(); 33 | if (!tracer) { 34 | return await fn(null); 35 | } 36 | const result = await tracer.startActiveSpan(name, async (span) => { 37 | try { 38 | const output = await fn(span); 39 | // 成功時はOKステータスを設定 40 | span.setStatus({ code: 1 }); // SpanStatusCode.OK 41 | return output; 42 | } catch (error) { 43 | // エラー詳細をキャプチャしてデバッグ可能にする 44 | span.setStatus({ 45 | code: 2, // SpanStatusCode.ERROR 46 | message: error instanceof Error ? error.message : String(error), 47 | }); 48 | span.setAttribute("error", true); 49 | if (error instanceof Error) { 50 | span.setAttribute("error.type", error.constructor.name); 51 | span.setAttribute("error.message", error.message); 52 | if (error.stack) { 53 | span.setAttribute("error.stack", error.stack); 54 | } 55 | } 56 | throw error; // エラーを記録後に再スロー 57 | } 58 | }); 59 | return result as T; 60 | } 61 | -------------------------------------------------------------------------------- /tests/indexer/dart/__fixtures__/outline-nested.json: -------------------------------------------------------------------------------- 1 | { 2 | "kind": "COMPILATION_UNIT", 3 | "offset": 0, 4 | "length": 250, 5 | "element": { 6 | "kind": "COMPILATION_UNIT", 7 | "name": "nested.dart" 8 | }, 9 | "children": [ 10 | { 11 | "kind": "CLASS", 12 | "offset": 0, 13 | "length": 200, 14 | "element": { 15 | "kind": "CLASS", 16 | "name": "Outer" 17 | }, 18 | "children": [ 19 | { 20 | "kind": "METHOD", 21 | "offset": 20, 22 | "length": 80, 23 | "element": { 24 | "kind": "METHOD", 25 | "name": "process", 26 | "parameters": "()", 27 | "returnType": "void" 28 | } 29 | }, 30 | { 31 | "kind": "GETTER", 32 | "offset": 105, 33 | "length": 45, 34 | "element": { 35 | "kind": "GETTER", 36 | "name": "value", 37 | "returnType": "int" 38 | } 39 | }, 40 | { 41 | "kind": "SETTER", 42 | "offset": 155, 43 | "length": 40, 44 | "element": { 45 | "kind": "SETTER", 46 | "name": "value", 47 | "parameters": "(int v)" 48 | } 49 | } 50 | ] 51 | }, 52 | { 53 | "kind": "ENUM", 54 | "offset": 205, 55 | "length": 45, 56 | "element": { 57 | "kind": "ENUM", 58 | "name": "Status" 59 | }, 60 | "children": [ 61 | { 62 | "kind": "ENUM_CONSTANT", 63 | "offset": 220, 64 | "length": 10, 65 | "element": { 66 | "kind": "ENUM_CONSTANT", 67 | "name": "active" 68 | } 69 | }, 70 | { 71 | "kind": "ENUM_CONSTANT", 72 | "offset": 235, 73 | "length": 10, 74 | "element": { 75 | "kind": "ENUM_CONSTANT", 76 | "name": "inactive" 77 | } 78 | } 79 | ] 80 | } 81 | ] 82 | } 83 | -------------------------------------------------------------------------------- /tests/server/domain-terms.spec.ts: -------------------------------------------------------------------------------- 1 | import { mkdtemp, rm, writeFile } from "node:fs/promises"; 2 | import { tmpdir } from "node:os"; 3 | import { join } from "node:path"; 4 | 5 | import { afterEach, describe, expect, it } from "vitest"; 6 | 7 | import { DomainTermsDictionary, loadDomainTerms } from "../../src/server/domain-terms.js"; 8 | 9 | const cleanups: Array<() => Promise> = []; 10 | 11 | afterEach(async () => { 12 | for (const cleanup of cleanups.splice(0, cleanups.length)) { 13 | await cleanup(); 14 | } 15 | }); 16 | 17 | describe("domain terms dictionary", () => { 18 | it("normalizes camelCase and hyphenated aliases and returns path hints", async () => { 19 | const dir = await mkdtemp(join(tmpdir(), "domain-terms-")); 20 | cleanups.push(async () => rm(dir, { recursive: true, force: true })); 21 | const configPath = join(dir, "domain-terms.yml"); 22 | await writeFile( 23 | configPath, 24 | `stats:\n - mann-whitney-u:\n aliases:\n - mannWhitneyU\n - Wilcoxon_Test\n files:\n - src/stats/mann.ts\n - rank-biserial:\n aliases:\n - rankBiserialEffect\n - effect-size\n files:\n - src/stats/rank-biserial.ts\n` 25 | ); 26 | 27 | const dictionary = loadDomainTerms({ configPath }); 28 | const expansion = dictionary.expandFromText("wilcoxon-test and effect-size"); 29 | 30 | expect(expansion.aliases).toEqual( 31 | expect.arrayContaining(["mann-whitney-u", "rank-biserial", "effect-size", "wilcoxon-test"]) 32 | ); 33 | expect(expansion.fileHints).toEqual([ 34 | { path: "src/stats/mann.ts", source: "mann-whitney-u" }, 35 | { path: "src/stats/rank-biserial.ts", source: "rank-biserial" }, 36 | ]); 37 | }); 38 | 39 | it("returns an empty expansion when no terms match", () => { 40 | const dictionary = new DomainTermsDictionary([]); 41 | const expansion = dictionary.expandFromText("unrelated phrase"); 42 | expect(expansion.aliases).toHaveLength(0); 43 | expect(expansion.fileHints).toHaveLength(0); 44 | }); 45 | }); 46 | -------------------------------------------------------------------------------- /tests/client/cli.security.spec.ts: -------------------------------------------------------------------------------- 1 | import { mkdtemp, readFile, rm } from "node:fs/promises"; 2 | import { tmpdir } from "node:os"; 3 | import { join } from "node:path"; 4 | 5 | import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; 6 | 7 | import { main } from "../../src/client/cli.js"; 8 | import { loadSecurityConfig } from "../../src/shared/security/config.js"; 9 | 10 | describe("CLI security verify", () => { 11 | let tempDir: string; 12 | const defaultDbName = "index.duckdb"; 13 | const expectedHash = loadSecurityConfig().hash; 14 | let infoSpy: ReturnType; 15 | let errorSpy: ReturnType; 16 | 17 | beforeEach(async () => { 18 | tempDir = await mkdtemp(join(tmpdir(), "kiri-cli-test-")); 19 | infoSpy = vi.spyOn(console, "info").mockImplementation(() => {}); 20 | errorSpy = vi.spyOn(console, "error").mockImplementation(() => {}); 21 | }); 22 | 23 | afterEach(async () => { 24 | infoSpy.mockRestore(); 25 | errorSpy.mockRestore(); 26 | await rm(tempDir, { recursive: true, force: true }); 27 | }); 28 | 29 | it("creates security.lock next to the database when --db is provided", async () => { 30 | const dbPath = join(tempDir, defaultDbName); 31 | const exitCode = main(["security", "verify", "--db", dbPath, "--write-lock"]); 32 | 33 | expect(exitCode).toBe(0); 34 | 35 | const lockPath = join(tempDir, "security.lock"); 36 | const lockContent = await readFile(lockPath, "utf-8"); 37 | expect(lockContent.trim()).toBe(expectedHash); 38 | }); 39 | 40 | it("honors --security-lock override", async () => { 41 | const dbPath = join(tempDir, defaultDbName); 42 | const customLock = join(tempDir, "locks", "custom.lock"); 43 | const exitCode = main([ 44 | "security", 45 | "verify", 46 | "--db", 47 | dbPath, 48 | "--security-lock", 49 | customLock, 50 | "--write-lock", 51 | ]); 52 | 53 | expect(exitCode).toBe(0); 54 | 55 | const lockContent = await readFile(customLock, "utf-8"); 56 | expect(lockContent.trim()).toBe(expectedHash); 57 | }); 58 | }); 59 | -------------------------------------------------------------------------------- /tests/indexer/dart/test-helpers.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Test helpers and mocks for Dart Analysis Server tests 3 | */ 4 | 5 | import { EventEmitter } from "node:events"; 6 | import { Readable, Writable } from "node:stream"; 7 | /** 8 | * Mock ChildProcess for Analysis Server tests 9 | */ 10 | export class MockChildProcess extends EventEmitter { 11 | stdin: Writable; 12 | stdout: Readable; 13 | stderr: Readable; 14 | killed = false; 15 | pid = 12345; 16 | 17 | constructor() { 18 | super(); 19 | this.stdin = new Writable({ 20 | write: (chunk, _encoding, callback) => { 21 | this.handleInput(chunk.toString()); 22 | callback(); 23 | }, 24 | }); 25 | this.stdout = new Readable({ read() {} }); 26 | this.stderr = new Readable({ read() {} }); 27 | } 28 | 29 | /** 30 | * Handle input from test (requests sent to Analysis Server) 31 | */ 32 | private handleInput(_data: string): void { 33 | // テスト側でオーバーライド可能 34 | } 35 | 36 | /** 37 | * Simulate server sending a message 38 | */ 39 | sendMessage(message: object): void { 40 | const line = JSON.stringify(message) + "\n"; 41 | this.stdout.push(line); 42 | } 43 | 44 | /** 45 | * Simulate server error 46 | */ 47 | sendError(error: string): void { 48 | this.stderr.push(error + "\n"); 49 | } 50 | 51 | kill(signal?: NodeJS.Signals | number): boolean { 52 | this.killed = true; 53 | this.emit("exit", 0, signal); 54 | return true; 55 | } 56 | } 57 | 58 | /** 59 | * Create mock response for analysis.getOutline 60 | */ 61 | export function createMockOutlineResponse(outlineData: object): object { 62 | return { 63 | id: "1", 64 | result: { 65 | outline: outlineData, 66 | }, 67 | }; 68 | } 69 | 70 | /** 71 | * Create mock Dart SDK info 72 | */ 73 | export function createMockSdkInfo() { 74 | return { 75 | sdkPath: "/mock/dart-sdk", 76 | version: "3.2.0", 77 | analysisServerPath: "/mock/dart-sdk/bin/snapshots/analysis_server.dart.snapshot", 78 | dartExecutable: "/mock/dart-sdk/bin/dart", // Windows fix: absolute path for spawn 79 | }; 80 | } 81 | -------------------------------------------------------------------------------- /src/shared/config-validate-adaptive-k.ts: -------------------------------------------------------------------------------- 1 | import type { AdaptiveKConfig } from "./adaptive-k.js"; 2 | 3 | function assertFinite(value: number, name: string): void { 4 | if (!Number.isFinite(value)) { 5 | throw new Error(`${name} must be a finite number`); 6 | } 7 | } 8 | 9 | export function validateAdaptiveKConfig(config: AdaptiveKConfig): void { 10 | assertFinite(config.kMin, "adaptiveK.kMin"); 11 | assertFinite(config.kMax, "adaptiveK.kMax"); 12 | if (config.kMin < 0) { 13 | throw new Error("adaptiveK.kMin must be >= 0"); 14 | } 15 | if (config.kMax <= config.kMin) { 16 | throw new Error("adaptiveK.kMax must be greater than kMin"); 17 | } 18 | 19 | if (!Array.isArray(config.allowedSet) || config.allowedSet.length === 0) { 20 | throw new Error("adaptiveK.allowedSet must be a non-empty array"); 21 | } 22 | for (const value of config.allowedSet) { 23 | assertFinite(value, "adaptiveK.allowedSet value"); 24 | if (value < config.kMin || value > config.kMax) { 25 | throw new Error("adaptiveK.allowedSet values must be within [kMin, kMax]"); 26 | } 27 | } 28 | 29 | for (const [key, value] of Object.entries(config.kMap)) { 30 | assertFinite(value, `adaptiveK.kMap[${key}]`); 31 | if (config.enabled && !config.allowedSet.includes(value)) { 32 | throw new Error(`adaptiveK.kMap[${key}] must belong to allowedSet when adaptiveK is enabled`); 33 | } 34 | if (value < config.kMin || value > config.kMax) { 35 | throw new Error(`adaptiveK.kMap[${key}] must be within [kMin, kMax]`); 36 | } 37 | } 38 | 39 | const defaults = [config.kDefault, config.kWhenDisabled]; 40 | for (const [idx, value] of defaults.entries()) { 41 | assertFinite(value, idx === 0 ? "adaptiveK.kDefault" : "adaptiveK.kWhenDisabled"); 42 | if (value < config.kMin || value > config.kMax) { 43 | throw new Error("adaptiveK default values must be within [kMin, kMax]"); 44 | } 45 | } 46 | 47 | if (config.enabled && !config.allowedSet.includes(config.kDefault)) { 48 | throw new Error("adaptiveK.kDefault must belong to allowedSet when adaptiveK is enabled"); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /scripts/diag.ts: -------------------------------------------------------------------------------- 1 | import { execSync } from "node:child_process"; 2 | import process from "node:process"; 3 | 4 | import { createDenylistFilter } from "../src/indexer/pipeline/filters/denylist.js"; 5 | 6 | import { checkHealth } from "./diag/health.js"; 7 | 8 | function run(command: string): string { 9 | return execSync(command, { encoding: "utf8" }).trim(); 10 | } 11 | 12 | export function collectDiagnostics(): Record { 13 | return { 14 | node: run("node --version"), 15 | pnpm: run("pnpm --version"), 16 | gitStatus: run("git status -sb"), 17 | }; 18 | } 19 | 20 | export function checkDenylist(repoRoot = process.cwd()): string[] { 21 | const filter = createDenylistFilter(repoRoot); 22 | return filter.diff(); 23 | } 24 | 25 | export async function main(argv = process.argv.slice(2)): Promise { 26 | const [command] = argv; 27 | try { 28 | switch (command) { 29 | case "check-denylist": { 30 | const diff = checkDenylist(); 31 | if (diff.length === 0) { 32 | console.info("Denylist matches .gitignore patterns."); 33 | return 0; 34 | } 35 | console.warn("Patterns present in .gitignore but missing from config/denylist.yml:"); 36 | for (const entry of diff) { 37 | console.warn(` - ${entry}`); 38 | } 39 | return 1; 40 | } 41 | case "health": { 42 | const report = await checkHealth(); 43 | console.info(JSON.stringify(report, null, 2)); 44 | return report.metricsReachable ? 0 : 1; 45 | } 46 | case undefined: 47 | console.info(JSON.stringify(collectDiagnostics(), null, 2)); 48 | return 0; 49 | default: 50 | console.error(`Unknown diag command: ${command}`); 51 | return 1; 52 | } 53 | } catch (error) { 54 | console.error("診断コマンドの実行に失敗しました", error); 55 | return 1; 56 | } 57 | } 58 | 59 | const executedDirectly = 60 | typeof process.argv[1] === "string" && new URL(import.meta.url).pathname === process.argv[1]; 61 | 62 | if (executedDirectly) { 63 | main().then((code) => { 64 | process.exitCode = code; 65 | }); 66 | } 67 | -------------------------------------------------------------------------------- /src/indexer/codeintel/dart/adapter.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Dart Language Analyzer Adapter 3 | * 4 | * 既存の Dart Analysis Server 実装をラップして LanguageAnalyzer インターフェースを提供。 5 | * Dart はLSPベースの Analysis Server を使用するため、他の tree-sitter ベースの 6 | * アナライザーとは異なるアーキテクチャを持つ。 7 | */ 8 | 9 | import { analyzeDartSource, cleanup } from "../../dart/analyze.js"; 10 | import type { LanguageAnalyzer, AnalysisContext, AnalysisResult } from "../types.js"; 11 | import { emptyResult } from "../types.js"; 12 | 13 | // 既存の Dart 解析実装をインポート 14 | // Note: analyzeDartSource と cleanup は src/indexer/dart/ にある既存実装を使用 15 | 16 | /** 17 | * Dart Language Analyzer 18 | * 19 | * LanguageAnalyzer インターフェースを実装し、 20 | * Dart Analysis Server を使用したシンボル抽出と依存関係解析を提供。 21 | * 22 | * 特徴: 23 | * - LSPベースの Analysis Server を使用 24 | * - プロセスプーリングによるリソース効率化 25 | * - 参照カウントとアイドルTTLによるライフサイクル管理 26 | * - workspaceRoot が必須 (Analysis Server は絶対パスを要求) 27 | */ 28 | export class DartAnalyzer implements LanguageAnalyzer { 29 | readonly language = "Dart"; 30 | 31 | async analyze(context: AnalysisContext): Promise { 32 | const { pathInRepo, content, workspaceRoot } = context; 33 | 34 | // Dart Analysis Server は workspaceRoot を要求する 35 | if (!workspaceRoot) { 36 | console.warn( 37 | `[DartAnalyzer] workspaceRoot required for Dart analysis, skipping ${pathInRepo}` 38 | ); 39 | return emptyResult(); 40 | } 41 | 42 | // 既存の Dart 解析実装を呼び出し 43 | const result = await analyzeDartSource(pathInRepo, content, workspaceRoot); 44 | 45 | // exactOptionalPropertyTypes対応: errorはundefinedの場合は省略 46 | return { 47 | symbols: result.symbols, 48 | snippets: result.snippets, 49 | dependencies: result.dependencies, 50 | status: result.status, 51 | ...(result.error !== undefined && { error: result.error }), 52 | }; 53 | } 54 | 55 | /** 56 | * 全ての Dart Analysis Server プロセスをクリーンアップ 57 | * 58 | * このメソッドは LanguageRegistry.cleanup() から呼び出され、 59 | * プール内の全クライアントを適切に終了する 60 | */ 61 | async dispose(): Promise { 62 | await cleanup(); 63 | } 64 | } 65 | 66 | /** 67 | * Dart アナライザーのファクトリ関数 68 | */ 69 | export function createDartAnalyzer(): DartAnalyzer { 70 | return new DartAnalyzer(); 71 | } 72 | -------------------------------------------------------------------------------- /tests/indexer/path-normalization.spec.ts: -------------------------------------------------------------------------------- 1 | import { mkdtemp, rm, symlink } from "node:fs/promises"; 2 | import { tmpdir } from "node:os"; 3 | import { join } from "node:path"; 4 | 5 | import { afterEach, describe, expect, it } from "vitest"; 6 | 7 | import { runIndexer } from "../../src/indexer/cli.js"; 8 | import { clearAllQueues } from "../../src/indexer/queue.js"; 9 | import { ensureBaseSchema } from "../../src/indexer/schema.js"; 10 | import { DuckDBClient } from "../../src/shared/duckdb.js"; 11 | import { createTempRepo } from "../helpers/test-repo.js"; 12 | 13 | describe("repo path normalization", () => { 14 | const cleanup: Array<() => Promise> = []; 15 | 16 | afterEach(async () => { 17 | while (cleanup.length > 0) { 18 | await cleanup.pop()!(); 19 | } 20 | clearAllQueues(); 21 | }); 22 | 23 | it("reuses legacy repo rows created with symlinked paths", async () => { 24 | const repo = await createTempRepo({ 25 | "src/app.ts": "export const answer = 42;\n", 26 | }); 27 | cleanup.push(repo.cleanup); 28 | 29 | const aliasDir = await mkdtemp(join(tmpdir(), "kiri-alias-")); 30 | const aliasPath = join(aliasDir, "link"); 31 | await symlink(repo.path, aliasPath); 32 | cleanup.push(async () => { 33 | await rm(aliasDir, { recursive: true, force: true }); 34 | }); 35 | 36 | const dbDir = await mkdtemp(join(tmpdir(), "kiri-path-db-")); 37 | const dbPath = join(dbDir, "index.duckdb"); 38 | cleanup.push(async () => { 39 | await rm(dbDir, { recursive: true, force: true }); 40 | }); 41 | 42 | const db = await DuckDBClient.connect({ databasePath: dbPath }); 43 | await ensureBaseSchema(db); 44 | await db.run("INSERT INTO repo (id, root, default_branch) VALUES (1, ?, 'main')", [aliasPath]); 45 | await db.close(); 46 | 47 | await runIndexer({ repoRoot: aliasPath, databasePath: dbPath, full: true }); 48 | 49 | const verifyDb = await DuckDBClient.connect({ databasePath: dbPath }); 50 | const repoRows = await verifyDb.all<{ id: number; root: string }>("SELECT id, root FROM repo"); 51 | 52 | expect(repoRows).toHaveLength(1); 53 | expect(repoRows[0]?.id).toBe(1); 54 | expect(repoRows[0]?.root).toBe(repo.path); 55 | await verifyDb.close(); 56 | }); 57 | }); 58 | -------------------------------------------------------------------------------- /src/server/services/repo-resolver.ts: -------------------------------------------------------------------------------- 1 | import { getRepoPathCandidates, normalizeRepoPath } from "../../shared/utils/path.js"; 2 | 3 | import { RepoRepository } from "./repo-repository.js"; 4 | 5 | /** 6 | * RepoNotFoundError 7 | * 8 | * リポジトリが見つからなかった場合のエラー 9 | */ 10 | export class RepoNotFoundError extends Error { 11 | constructor(message: string) { 12 | super(message); 13 | this.name = "RepoNotFoundError"; 14 | } 15 | } 16 | 17 | /** 18 | * RepoResolver 19 | * 20 | * リポジトリパスをデータベースIDに解決する責務を持つクラス。 21 | * パス正規化とDB検索を組み合わせ、エラー処理も担当する。 22 | */ 23 | export class RepoResolver { 24 | constructor(private repository: RepoRepository) {} 25 | 26 | /** 27 | * リポジトリのrootパスをデータベースIDに解決する。 28 | * 29 | * @param repoRoot - リポジトリのrootパス 30 | * @returns リポジトリID 31 | * @throws RepoNotFoundError リポジトリがインデックスされていない場合 32 | */ 33 | async resolveId(repoRoot: string): Promise { 34 | // repoテーブルの存在確認 35 | const tableExists = await this.repository.tableExists(); 36 | if (!tableExists) { 37 | throw new RepoNotFoundError( 38 | `Repository ${repoRoot} was not indexed. Run the indexer before starting the server.` 39 | ); 40 | } 41 | 42 | // パス候補と正規化パスを取得 43 | const candidates = getRepoPathCandidates(repoRoot); 44 | const normalized = candidates[0]; 45 | 46 | // exactOptionalPropertyTypes 対応: candidates[0] が undefined の場合はエラー 47 | if (!normalized) { 48 | throw new RepoNotFoundError( 49 | `Repository ${repoRoot} path normalization failed. Check path validity.` 50 | ); 51 | } 52 | 53 | // 高速パス: 直接検索を試みる 54 | let repo = await this.repository.findByPaths(candidates); 55 | 56 | // 低速パス: 正規化フォールバックを試みる 57 | if (!repo) { 58 | repo = await this.repository.findByNormalizedPath(normalized, normalizeRepoPath); 59 | 60 | if (!repo) { 61 | throw new RepoNotFoundError( 62 | `Repository ${repoRoot} was not indexed. Run the indexer before starting the server.` 63 | ); 64 | } 65 | 66 | // 次回の高速検索のために正規化パスに更新 67 | await this.repository.updateRoot(repo.id, normalized); 68 | } 69 | 70 | // パスが正規化されていない場合は更新 71 | if (repo.root !== normalized) { 72 | await this.repository.updateRoot(repo.id, normalized); 73 | } 74 | 75 | return repo.id; 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/shared/utils/retry.ts: -------------------------------------------------------------------------------- 1 | export interface RetryOptions { 2 | maxAttempts: number; 3 | delayMs: number; 4 | jitterMs?: number; 5 | /** 最大遅延時間(ミリ秒)。指数バックオフの上限を設定 */ 6 | maxDelayMs?: number; 7 | /** バックオフ倍率。指定時は指数バックオフを使用(デフォルト: 1 = 線形) */ 8 | backoffMultiplier?: number; 9 | isRetriable?: (error: unknown) => boolean; 10 | } 11 | 12 | /** 13 | * 指数バックオフ付きの遅延時間を計算 14 | * 15 | * @param baseDelayMs - 基本遅延時間 16 | * @param attempt - 現在の試行回数(1から始まる) 17 | * @param backoffMultiplier - バックオフ倍率(デフォルト: 1 = 線形) 18 | * @param maxDelayMs - 最大遅延時間(上限) 19 | * @param jitterMs - ジッター(ランダムな追加遅延) 20 | * @returns 計算された遅延時間(ミリ秒) 21 | */ 22 | export function calculateBackoffDelay( 23 | baseDelayMs: number, 24 | attempt: number, 25 | backoffMultiplier: number = 1, 26 | maxDelayMs?: number, 27 | jitterMs: number = 0 28 | ): number { 29 | // 指数バックオフ: baseDelay * (multiplier ^ (attempt - 1)) 30 | // attempt=1 では baseDelay、attempt=2 では baseDelay * multiplier、... 31 | const exponentialDelay = baseDelayMs * Math.pow(backoffMultiplier, attempt - 1); 32 | 33 | // 最大遅延を適用 34 | const cappedDelay = 35 | maxDelayMs !== undefined ? Math.min(exponentialDelay, maxDelayMs) : exponentialDelay; 36 | 37 | // ジッターを追加 38 | const jitter = jitterMs > 0 ? Math.random() * jitterMs : 0; 39 | 40 | return cappedDelay + jitter; 41 | } 42 | 43 | export async function withRetry( 44 | operation: () => Promise, 45 | { 46 | maxAttempts, 47 | delayMs, 48 | jitterMs = 0, 49 | maxDelayMs, 50 | backoffMultiplier = 1, 51 | isRetriable, 52 | }: RetryOptions 53 | ): Promise { 54 | if (maxAttempts < 1) { 55 | throw new Error("maxAttempts must be >= 1"); 56 | } 57 | 58 | for (let attempt = 1; attempt <= maxAttempts; attempt++) { 59 | try { 60 | return await operation(); 61 | } catch (error) { 62 | const shouldRetry = attempt < maxAttempts && (isRetriable ? isRetriable(error) : true); 63 | if (!shouldRetry) { 64 | throw error; 65 | } 66 | const delay = calculateBackoffDelay( 67 | delayMs, 68 | attempt, 69 | backoffMultiplier, 70 | maxDelayMs, 71 | jitterMs 72 | ); 73 | await new Promise((resolve) => setTimeout(resolve, delay)); 74 | } 75 | } 76 | 77 | throw new Error("Retry attempts exhausted"); 78 | } 79 | -------------------------------------------------------------------------------- /docs/operations.md: -------------------------------------------------------------------------------- 1 | --- 2 | doc_id: "RUN-002" 3 | title: "運用と可観測性" 4 | category: "operations" 5 | tags: 6 | - operations 7 | - observability 8 | - docs 9 | service: "kiri" 10 | --- 11 | 12 | # 運用と可観測性 13 | 14 | > 関連: [KIRI 概要](./overview.md#kiri-概要) / [運用 Runbook](./runbook.md#運用-runbook) / [セキュリティとコンプライアンス](./security.md#セキュリティとコンプライアンス) 15 | 16 | ## SLO とメトリクス 17 | 18 | - **SLO** 19 | - `context_bundle` P95 を **≤ 1000ms** に維持する。 20 | - 初回フル索引を **1M 行/10分** 以内で完了させる。 21 | - 差分取り込みを **5 分バッチ / P95 1 分未満** に抑える。 22 | - **モニタリング対象** 23 | - Indexer: 走査数/分、blob 重複率、再構築時間。 24 | - MCP: ツール別レイテンシ/エラー率、Degrade 発動回数。 25 | - 検索品質: P@k、NDCG、TTFU、Token 削減率。 26 | 27 | ## 失敗モードと Degrade 戦略 28 | 29 | - **拡張ロード失敗(FTS/VSS)**: 文字列検索+構造近接のみで結果を返却し、VSS を無効化。 30 | - **DuckDB ロック衝突**: 読み込みは許可し、書き込みはステージング→バッチに統一して再試行。 31 | - **依存解決不能**: `dst_kind="package"` として保持し、パス近接の重み付けを増やす。 32 | - **blame 計算コスト増**: 差分のみ逐次更新し、巨大ファイルは週次フル再計算に限定する。 33 | 34 | ## ヒント辞書の運用 35 | 36 | ユーザーが `artifacts.hints` を指定しない抽象クエリでも確実に実装へ到達させるため、`hint_expansion` / `hint_dictionary` を定期的に更新する。 37 | 38 | 1. **ログ計測**: 影響を調べたい間だけ `KIRI_HINT_LOG=1` を付けて `pnpm run eval:golden` などを実行すると、`hint_expansion` テーブルに展開履歴が残る(通常運用では書き込みコストを避けるため OFF)。 39 | 2. **ログ確認**: `pnpm exec tsx scripts/diag/dump-hints.ts --db var/index.duckdb --repo . --limit 200` で直近の展開を確認できる。 40 | 3. **辞書再構築**: 新しいログを基に `pnpm exec tsx scripts/diag/build-hint-dictionary.ts --db var/index.duckdb --repo . --min-freq 2` を実行すると、頻出ヒント→パスのマッピングが更新される。 41 | 4. **TTL 清掃**: 長期間のログは `pnpm exec tsx scripts/diag/cleanup-hints.ts --db var/index.duckdb --days 14` で破棄し、DuckDB サイズ膨張を防ぐ。 42 | 43 | > メモ: 辞書は substring ヒントを入力に path ヒントへ昇格させるため、`context_bundle` は `dictionary:hint:` という why タグを返す。Metadata だけでヒットしないドキュメントが増えてきたら辞書の更新を検討する。 44 | 45 | ## npm 公開フロー 46 | 47 | 1. `pnpm install` → `pnpm run check` を実行し、Lint とテストがすべて成功することを確認する。 48 | 2. `package.json` の `version` を SemVer に従って更新し、変更点を `CHANGELOG.md`(追記が必要な場合)へ反映する。 49 | 3. `pnpm run build` を実行して `dist/` を再生成し、`git status` で不要な生成物が残っていないか検証する。 50 | 4. `npm login`(初回のみ)後、公開アクセスの場合は `pnpm publish --access public` を実行する。プライベート公開の場合は `--access restricted` を指定する。 51 | 5. 公開完了後にタグ付け `git tag v` → `git push origin --tags` を行い、GitHub Release と npm のバージョンを同期させる。 52 | 6. パッケージをグローバルインストールして動作確認 (`npm install -g kiri-mcp-server` → `kiri-server --help`) を行い、問題があれば速やかに `npm deprecate` とパッチリリースで対処する。 53 | -------------------------------------------------------------------------------- /src/eval/metrics.ts: -------------------------------------------------------------------------------- 1 | export interface RetrievalEvent { 2 | id: string; 3 | timestampMs: number; 4 | } 5 | 6 | export function precisionAtK( 7 | retrievedIds: string[], 8 | relevantIds: Iterable, 9 | k: number 10 | ): number { 11 | if (k <= 0 || retrievedIds.length === 0) { 12 | return 0; 13 | } 14 | const relevantSet = new Set(relevantIds); 15 | if (relevantSet.size === 0) { 16 | return 0; 17 | } 18 | const limit = Math.min(k, retrievedIds.length); 19 | let hits = 0; 20 | for (let index = 0; index < limit; index += 1) { 21 | const id = retrievedIds[index]; 22 | if (id !== undefined && relevantSet.has(id)) { 23 | hits += 1; 24 | } 25 | } 26 | return hits / limit; 27 | } 28 | 29 | export interface LatencyEvent { 30 | timestampMs: number; 31 | relevant: boolean; 32 | } 33 | 34 | export function timeToFirstUseful( 35 | events: LatencyEvent[], 36 | options: { startTimestampMs?: number } = {} 37 | ): number { 38 | if (events.length === 0) { 39 | return Number.POSITIVE_INFINITY; 40 | } 41 | const sorted = [...events].sort((a, b) => a.timestampMs - b.timestampMs); 42 | const baseline = 43 | typeof options.startTimestampMs === "number" 44 | ? options.startTimestampMs 45 | : (sorted[0]?.timestampMs ?? 0); 46 | for (const event of sorted) { 47 | if (event.relevant) { 48 | const deltaMs = event.timestampMs - baseline; 49 | return Math.max(0, deltaMs) / 1000; 50 | } 51 | } 52 | return Number.POSITIVE_INFINITY; 53 | } 54 | 55 | export interface EvaluateRetrievalOptions { 56 | items: RetrievalEvent[]; 57 | relevant: Iterable; 58 | k: number; 59 | } 60 | 61 | export interface RetrievalMetrics { 62 | precisionAtK: number; 63 | timeToFirstUseful: number; 64 | } 65 | 66 | export function evaluateRetrieval(options: EvaluateRetrievalOptions): RetrievalMetrics { 67 | const { items, relevant, k } = options; 68 | const ids = items.map((item) => item.id); 69 | const precision = precisionAtK(ids, relevant, k); 70 | const relevantSet = new Set(relevant); 71 | const latencyEvents: LatencyEvent[] = items.map((item) => ({ 72 | timestampMs: item.timestampMs, 73 | relevant: relevantSet.has(item.id), 74 | })); 75 | const ttff = timeToFirstUseful(latencyEvents); 76 | return { precisionAtK: precision, timeToFirstUseful: ttff }; 77 | } 78 | -------------------------------------------------------------------------------- /src/server/context.ts: -------------------------------------------------------------------------------- 1 | import { DuckDBClient } from "../shared/duckdb.js"; 2 | 3 | import { WarningManager } from "./rpc.js"; 4 | import { ServerServices } from "./services/index.js"; 5 | 6 | export interface FtsStatusCache { 7 | ready: boolean; 8 | schemaExists: boolean; 9 | anyDirty: boolean; 10 | lastChecked: number; 11 | } 12 | 13 | export interface TableAvailability { 14 | hasMetadataTables: boolean; 15 | hasLinkTable: boolean; 16 | hasHintLog: boolean; 17 | hasHintDictionary: boolean; 18 | hasGraphMetrics: boolean; // graph_metrics テーブル(グラフレイヤースコアリング用) 19 | hasCochange: boolean; // cochange テーブル(コチェンジスコアリング用) 20 | } 21 | 22 | export interface ServerContext { 23 | db: DuckDBClient; 24 | repoId: number; 25 | services: ServerServices; 26 | databasePath?: string; 27 | repoPath?: string; 28 | features?: { 29 | fts?: boolean; // FTS拡張が利用可能かどうか 30 | }; 31 | ftsStatusCache?: FtsStatusCache; 32 | tableAvailability: TableAvailability; 33 | warningManager: WarningManager; 34 | } 35 | 36 | /** 37 | * createServerContext 38 | * 39 | * ServerContext を生成するファクトリ関数。 40 | * テストや複数のエントリポイントで共通の初期化パスを提供する。 41 | * 42 | * @param options - コンテキスト初期化オプション 43 | * @returns 初期化された ServerContext 44 | */ 45 | export function createServerContext(options: { 46 | db: DuckDBClient; 47 | repoId: number; 48 | services: ServerServices; 49 | databasePath?: string; 50 | repoPath?: string; 51 | features?: { fts?: boolean }; 52 | ftsStatusCache?: FtsStatusCache; 53 | tableAvailability: TableAvailability; 54 | warningManager: WarningManager; 55 | }): ServerContext { 56 | const context: ServerContext = { 57 | db: options.db, 58 | repoId: options.repoId, 59 | services: options.services, 60 | tableAvailability: options.tableAvailability, 61 | warningManager: options.warningManager, 62 | }; 63 | 64 | if (options.databasePath !== undefined) { 65 | context.databasePath = options.databasePath; 66 | } 67 | 68 | if (options.repoPath !== undefined) { 69 | context.repoPath = options.repoPath; 70 | } 71 | 72 | // exactOptionalPropertyTypes: true を満たすため、undefined の場合は代入しない 73 | if (options.features !== undefined) { 74 | context.features = options.features; 75 | } 76 | 77 | if (options.ftsStatusCache !== undefined) { 78 | context.ftsStatusCache = options.ftsStatusCache; 79 | } 80 | 81 | return context; 82 | } 83 | -------------------------------------------------------------------------------- /docs/formal/README.md: -------------------------------------------------------------------------------- 1 | # Issue 106 Formal Reference 2 | 3 | ## Modules 4 | 5 | | Module | Purpose | 6 | | ------------------------- | ----------------------------------------------------------------------- | 7 | | `PathPenaltyMerge.tla` | Models YAML/ENV/Profile path penalty merging, invariants、liveness。 | 8 | | `PathPenaltyEncoding.tla` | 別モジュール化したプレフィックスの encode / decode / 正規化ロジック。 | 9 | | `AdaptiveK.tla` | クエリカテゴリに応じた K 値決定ロジックとフォールバックの安全性を検証。 | 10 | 11 | ## 仕様→実装対応 12 | 13 | | 仕様要素 | 実装参照 | 説明 | 14 | | ------------------------------ | ----------------------------------------------------- | ---------------------------------------------------------------------------------------- | 15 | | `EntryPool` / `Merge` | `src/server/config-loader.ts`(予定) | YAML・環境変数を読み、pathMultipliers 配列を構築するフェーズ。 | 16 | | `WinningMultiplier` | `src/server/handlers.ts#getPathMultiplier` | 最長一致+優先度選択。条件が合わない場合は 1.0 になり `applyFileTypeMultipliers` に反映。 | 17 | | `BuildSortedList` | `config-loader` 内 `sortByPrefixLength`(実装タスク) | prefix 長で降順にソートし、`PrefixOrder` のような構造を生成。 | 18 | | `Encode/Decode` (別モジュール) | env 変数パーサ (`process.env.KIRI_PATH_PENALTY_*`) | `/ ↔ __` 変換や Windows 正規化の契約を定義。 | 19 | 20 | ## TLC プロファイルと戦略 21 | 22 | - `PathPenaltyMerge.cfg` … `(MaxSeqLenYaml, MaxSeqLenEnv, MaxSeqLenProfile) = (2,2,2)` の高速検証。CI はこちらを使用。 23 | - `PathPenaltyMerge-max3.cfg` … YAML を 3 まで広げた再検証。状態数が増えるためローカル/ナイトリー専用。 24 | - さらに探索を広げる場合は: 25 | - **bounded exploration**: `.cfg` に別途 `MaxSeqLen*` を追加し、1段階ずつ増やす。 26 | - **symmetry reduction**: prefix が実装上同型な場合は `Symmetry` 宣言で削減。 27 | - **random sampling**: `EntryPool` をサブセットに差し替えた派生 `.cfg` を複数用意し、seed を変えて走らせる。 28 | 29 | ## Windows / 相対パス 30 | 31 | `PathPenaltyEncoding.tla` の `OSNormalize` は「実装が `\` や `.` / `..` を resolve 済みである」ことを前提とする演算子。実装では `path.posix` + 追加ルールを適用し、仕様側は `ASSUME PrefixesAreNormalized` で整合を取る。 32 | 33 | ## 今後の TODO 34 | 35 | - `config-loader` 実装が固まったら `EntryPool` や `PrefixOrder` を自動生成するテストコードを追加。 36 | - env エンコードの完全性は Alloy で検証する予定(構造整合性に向いた題材)。 37 | - 活性・公平性など、より非自明な性質も随時追加する。 38 | -------------------------------------------------------------------------------- /tests/server/fts-status-cache.spec.ts: -------------------------------------------------------------------------------- 1 | import { mkdtemp, rm } from "node:fs/promises"; 2 | import { tmpdir } from "node:os"; 3 | import { join } from "node:path"; 4 | 5 | import { describe, expect, it } from "vitest"; 6 | 7 | import { ensureBaseSchema, ensureRepoMetaColumns } from "../../src/indexer/schema.js"; 8 | import { type ServerContext } from "../../src/server/context.js"; 9 | import { checkTableAvailability, filesSearch } from "../../src/server/handlers.js"; 10 | import { WarningManager } from "../../src/server/rpc.js"; 11 | import { createServerServices } from "../../src/server/services/index.js"; 12 | import { DuckDBClient } from "../../src/shared/duckdb.js"; 13 | 14 | describe("FTS status cache invalidation", () => { 15 | it("degrades immediately when repo becomes dirty after caching", async () => { 16 | const tempDir = await mkdtemp(join(tmpdir(), "kiri-fts-cache-")); 17 | const dbPath = join(tempDir, "index.duckdb"); 18 | const db = await DuckDBClient.connect({ databasePath: dbPath }); 19 | try { 20 | await ensureBaseSchema(db); 21 | await ensureRepoMetaColumns(db); 22 | 23 | await db.run( 24 | `INSERT INTO repo (id, root, default_branch, fts_dirty, fts_status, fts_last_indexed_at) 25 | VALUES (1, ?, 'main', false, 'clean', CURRENT_TIMESTAMP)`, 26 | [tempDir] 27 | ); 28 | 29 | await db.run(`INSERT INTO blob (hash, content) VALUES ('blob-1', 'hello world content');`); 30 | await db.run( 31 | `INSERT INTO file (repo_id, path, blob_hash, ext, lang, is_binary, mtime) 32 | VALUES (1, 'src/hello.ts', 'blob-1', '.ts', 'typescript', FALSE, CURRENT_TIMESTAMP)` 33 | ); 34 | 35 | const tableAvailability = await checkTableAvailability(db); 36 | const warningManager = new WarningManager(); 37 | const context: ServerContext = { 38 | db, 39 | repoId: 1, 40 | services: createServerServices(db), 41 | features: { fts: true }, 42 | ftsStatusCache: { 43 | ready: true, 44 | schemaExists: true, 45 | anyDirty: false, 46 | lastChecked: Date.now(), 47 | }, 48 | tableAvailability, 49 | warningManager, 50 | }; 51 | 52 | await db.run(`UPDATE repo SET fts_dirty = true, fts_status = 'rebuilding' WHERE id = 1`); 53 | 54 | const results = await filesSearch(context, { query: "hello" }); 55 | 56 | expect(results.length).toBeGreaterThan(0); 57 | expect(context.features?.fts).toBe(false); 58 | expect(context.ftsStatusCache?.ready).toBe(false); 59 | } finally { 60 | await db.close(); 61 | await rm(tempDir, { recursive: true, force: true }); 62 | } 63 | }); 64 | }); 65 | -------------------------------------------------------------------------------- /tests/eval/results/2025-11-17-docs-plain.md: -------------------------------------------------------------------------------- 1 | # KIRI Golden Set Evaluation - 2025-11-17 2 | 3 | **Version**: 0.10.0 (9e59843) 4 | **Dataset**: v2025-11-docs-plain 5 | **K**: 10 6 | 7 | ## Overall Metrics 8 | 9 | | Metric | Value | Threshold | Status | 10 | | ------------------- | ------ | --------- | ------ | 11 | | P@10 | 0.286 | ≥0.7 | ❌ | 12 | | Avg TFFU | 1ms | ≤1000ms | ✅ | 13 | | Avg Token Savings | 89.5% | ≥40% | ✅ | 14 | | Avg Hint Coverage | 14.1% | - | - | 15 | | Metadata Pass | 50.0% | ≥100% | ❌ | 16 | | Inbound Link Pass | 50.0% | ≥100% | ❌ | 17 | | Avg Bundle Tokens | 42462 | - | - | 18 | | Avg Baseline Tokens | 457694 | - | - | 19 | | Total Queries | 22 | - | - | 20 | | Successful | 17 | - | - | 21 | | Failed | 5 | - | - | 22 | 23 | ## By Category 24 | 25 | | Category | P@10 | Avg TFFU | Avg Token Savings | Avg Hint Coverage | Metadata Pass | Inbound Pass | Count | 26 | | ---------- | ----- | -------- | ----------------- | ----------------- | ------------- | ------------ | ----- | 27 | | editor | 0.200 | 0ms | 98.1% | 30.0% | N/A | N/A | 2 | 28 | | api | 0.133 | 0ms | 87.9% | 16.7% | N/A | N/A | 3 | 29 | | debug | 0.150 | 0ms | 81.0% | 15.0% | N/A | N/A | 2 | 30 | | feature | 0.125 | 0ms | 98.6% | 17.5% | N/A | N/A | 4 | 31 | | infra | 0.200 | 0ms | 62.7% | 30.0% | N/A | N/A | 1 | 32 | | docs | 0.900 | 4ms | 88.5% | 0.0% | 100.0% | 100.0% | 5 | 33 | | docs-plain | 0.000 | 0ms | N/A | N/A | 0.0% | 0.0% | 5 | 34 | 35 | ## Category Δ Metrics 36 | 37 | | Baseline | Variant | ΔP@10 | ΔMetadata Pass | ΔInbound Pass | 38 | | -------- | ---------- | ------ | -------------- | ------------- | 39 | | docs | docs-plain | -0.900 | -1.000 | -1.000 | 40 | 41 | ## Failed Queries 42 | 43 | | ID | Status | Error | 44 | | ---------------------------------------- | ------ | ----- | 45 | | kiri-docs-runbook-degrade-plain | empty | N/A | 46 | | kiri-docs-security-windows-plain | empty | N/A | 47 | | kiri-docs-search-ranking-profile-plain | empty | N/A | 48 | | kiri-docs-operations-observability-plain | empty | N/A | 49 | | kiri-docs-testing-golden-plain | empty | N/A | 50 | -------------------------------------------------------------------------------- /tests/helpers/fixtures.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Test fixture factories for database records 3 | * These factories help maintain consistency when schema changes occur 4 | */ 5 | 6 | /** 7 | * File table record structure 8 | */ 9 | export interface FileRecord { 10 | repo_id: number; 11 | path: string; 12 | blob_hash: string; 13 | ext: string; 14 | lang: string; 15 | is_binary: boolean; 16 | mtime: string; 17 | [key: string]: string | number | boolean; // Index signature for buildInsertStatement compatibility 18 | } 19 | 20 | /** 21 | * Document metadata record structure 22 | */ 23 | export interface DocumentMetadataRecord { 24 | repo_id: number; 25 | path: string; 26 | source: string; 27 | data: string; 28 | [key: string]: string | number; // Index signature for buildInsertStatement compatibility 29 | } 30 | 31 | /** 32 | * Create a file record with default values 33 | * 34 | * @param repoId Repository ID 35 | * @param overrides Partial overrides for default values 36 | * @returns FileRecord with defaults applied 37 | */ 38 | export function createFileRecord( 39 | repoId: number, 40 | overrides: Partial> = {} 41 | ): FileRecord { 42 | return { 43 | repo_id: repoId, 44 | path: "docs/README.md", 45 | blob_hash: "test-hash", 46 | ext: ".md", 47 | lang: "markdown", 48 | is_binary: false, 49 | mtime: "2024-01-01T00:00:00.000Z", 50 | ...overrides, 51 | }; 52 | } 53 | 54 | /** 55 | * Create a document metadata record with default values 56 | * 57 | * @param repoId Repository ID 58 | * @param overrides Partial overrides for default values 59 | * @returns DocumentMetadataRecord with defaults applied 60 | */ 61 | export function createDocumentMetadataRecord( 62 | repoId: number, 63 | overrides: Partial> = {} 64 | ): DocumentMetadataRecord { 65 | return { 66 | repo_id: repoId, 67 | path: "docs/README.md", 68 | source: "front_matter", 69 | data: JSON.stringify({ title: "Test" }), 70 | ...overrides, 71 | }; 72 | } 73 | 74 | /** 75 | * Helper to build parameterized INSERT statement from a record object 76 | * 77 | * @param tableName Name of the table 78 | * @param record Record object with column names as keys 79 | * @returns Object with SQL string and values array 80 | */ 81 | export function buildInsertStatement>( 82 | tableName: string, 83 | record: T 84 | ): { sql: string; values: unknown[] } { 85 | const columns = Object.keys(record); 86 | const placeholders = columns.map(() => "?").join(", "); 87 | const sql = `INSERT INTO ${tableName} (${columns.join(", ")}) VALUES (${placeholders})`; 88 | const values = Object.values(record); 89 | 90 | return { sql, values }; 91 | } 92 | -------------------------------------------------------------------------------- /docs/runbook.md: -------------------------------------------------------------------------------- 1 | --- 2 | doc_id: "RUN-001" 3 | title: "運用 Runbook" 4 | category: "operations" 5 | tags: 6 | - operations 7 | - observability 8 | - degrade 9 | - docs 10 | service: "kiri" 11 | --- 12 | 13 | # 運用 Runbook 14 | 15 | > 関連: [KIRI 概要](./overview.md#kiri-概要) / [検索とランキング](./search-ranking.md#検索とランキング) / [セキュリティガイド](./security.md#セキュリティとコンプライアンス) 16 | 17 | ## Degrade 発生時の復旧手順 18 | 19 | 1. `/metrics` を確認し、`kiri_http_requests_total` の伸びと 503 レスポンスが増えていないかを確認する。 20 | 2. 監査ログ (`var/audit/*.json`) を開き、直近の `degrade` イベントと対象リポジトリを確認する。 21 | 3. `pnpm exec tsx src/client/cli.ts security verify --db ` を実行し、セキュリティロックに改ざんがないことを確認する。 22 | 4. DuckDB/VSS プロセスのヘルスチェックを行い、必要であれば再起動する。 23 | 5. 復旧が完了したらサーバーを `--allow-degrade` なしで再起動する。復旧完了後に `/metrics` のレスポンスから `degrade: true` 24 | が含まれないことを確認する。 25 | 26 | ## 観測項目 27 | 28 | ### Prometheus メトリクス(`/metrics` エンドポイント) 29 | 30 | サーバーは `http://localhost:8765/metrics` で Prometheus 形式のメトリクスを公開する。以下の指標を監視する: 31 | 32 | - **`kiri_http_requests_total`**: 処理された JSON-RPC リクエストの総数 33 | - 異常: 急激な減少(サービス停止の可能性)または異常な増加(攻撃の可能性) 34 | - **`kiri_mask_applied_total`**: 機密データのマスキング件数 35 | - 異常: 急増した場合は流出を疑い監査を実施する 36 | - **`kiri_denylist_hits_total`**: インデックス除外の件数 37 | - 異常: 設定値と `.gitignore` の差分がないか `pnpm exec tsx scripts/diag.ts check-denylist` で確認する 38 | - **`kiri_request_duration_ms`**: JSON-RPC リクエストの累積処理時間(ミリ秒) 39 | - 異常: 急増時は DuckDB/VSS の負荷を調査する 40 | 41 | **推奨アラート設定**: 42 | 43 | - リクエスト処理時間の平均が 1000ms を超える場合 44 | - マスキング件数が通常の 10倍を超える場合 45 | - Denylist ヒット率が 50% を超える場合(設定ミスの可能性) 46 | 47 | ### OpenTelemetry トレーシング 48 | 49 | OpenTelemetry が利用可能な環境では、各リクエストにスパンが記録され、以下の情報が含まれる: 50 | 51 | - **成功時**: span status code = 1 (OK) 52 | - **失敗時**: span status code = 2 (ERROR)、エラー属性(`error.type`, `error.message`, `error.stack`) 53 | 54 | トレースデータは Jaeger、Zipkin、Datadog などの APM ツールで可視化できる。 55 | 56 | **トラブルシューティング**: 57 | 58 | 1. エラースパンでフィルタリングして失敗オペレーションを特定 59 | 2. `error.stack` 属性からスタックトレースを確認 60 | 3. スパンの継続時間から性能ボトルネックを発見 61 | 62 | ## メタデータスキーマ移行(2025-11-15) 63 | 64 | DuckDB に `document_metadata` / `document_metadata_kv` / `markdown_link` テーブルが追加された。既存 DB をアップグレードするには以下の手順を実行する: 65 | 66 | 1. 作業ディレクトリをクリーンにし、`pnpm install && pnpm run build` で最新バイナリを生成する。 67 | 2. 対象リポジトリで `pnpm exec kiri index --full --repo --db ` を実行し、全ファイルを再取り込みする。完了後 `repo` テーブルの `indexed_at` を確認して最新化されていることを確認。 68 | 3. `pnpm run check` を実行し、検索テスト(`files_search`/`context_bundle`)が metadata フィルタ付きで成功することを確認する。 69 | 70 | ### ロールバック 71 | 72 | 万一メタデータテーブルが原因で問題が発生した場合は以下でロールバックできる: 73 | 74 | ``` 75 | duckdb 76 | > DROP TABLE IF EXISTS markdown_link; 77 | > DROP TABLE IF EXISTS document_metadata_kv; 78 | > DROP TABLE IF EXISTS document_metadata; 79 | ``` 80 | 81 | ロールバック後に旧バージョンのバイナリで `pnpm exec kiri index --full …` を再実行すると従来のスキーマに戻せる。再移行時は上記手順を繰り返す。 82 | -------------------------------------------------------------------------------- /tests/server/degrade.spec.ts: -------------------------------------------------------------------------------- 1 | import { mkdir, mkdtemp, writeFile } from "node:fs/promises"; 2 | import { join } from "node:path"; 3 | 4 | import { afterEach, describe, expect, it } from "vitest"; 5 | 6 | import { DegradeController } from "../../src/server/fallbacks/degradeController.js"; 7 | 8 | const tempDirs: string[] = []; 9 | 10 | afterEach(async () => { 11 | const fs = await import("node:fs/promises"); 12 | for (const dir of tempDirs.splice(0)) { 13 | await fs.rm(dir, { recursive: true, force: true }); 14 | } 15 | }); 16 | 17 | describe("DegradeController", () => { 18 | it("enters degrade mode on failure and provides fallback search", async () => { 19 | const dir = await mkdtemp("degrade-test-"); 20 | tempDirs.push(dir); 21 | await mkdir(join(dir, "src")); 22 | await writeFile(join(dir, "src", "file.ts"), "export const value = 42;\nconsole.log(value);\n"); 23 | 24 | const controller = new DegradeController(dir); 25 | await expect( 26 | controller.withResource(async () => { 27 | throw new Error("duckdb down"); 28 | }, "duckdb") 29 | ).rejects.toThrow(); 30 | 31 | expect(controller.current.active).toBe(true); 32 | const results = controller.search("console value"); 33 | expect(results[0]?.path).toContain("src/file.ts"); 34 | }); 35 | 36 | it("does not enter degrade mode for user-facing tool errors", async () => { 37 | const dir = await mkdtemp("degrade-test-"); 38 | tempDirs.push(dir); 39 | const controller = new DegradeController(dir); 40 | 41 | await expect( 42 | controller.withResource(async () => { 43 | throw new Error( 44 | "Requested snippet file was not indexed. Re-run the indexer or choose another path." 45 | ); 46 | }, "duckdb:snippets_get") 47 | ).rejects.toThrow(); 48 | 49 | expect(controller.current.active).toBe(false); 50 | }); 51 | 52 | it("skips binary files when performing fallback search", async () => { 53 | const dir = await mkdtemp("degrade-test-binary-"); 54 | tempDirs.push(dir); 55 | await mkdir(join(dir, "bin")); 56 | await writeFile(join(dir, "bin", "binary.dat"), Buffer.from([0, 1, 2, 3])); 57 | 58 | const controller = new DegradeController(dir); 59 | const results = controller.search("0 1"); 60 | expect(results).toHaveLength(0); 61 | }); 62 | 63 | it("skips files larger than the preview threshold", async () => { 64 | const dir = await mkdtemp("degrade-test-large-"); 65 | tempDirs.push(dir); 66 | await mkdir(join(dir, "large")); 67 | const largeContent = "x".repeat(512 * 1024); 68 | await writeFile(join(dir, "large", "huge.txt"), largeContent); 69 | 70 | const controller = new DegradeController(dir); 71 | const results = controller.search("x"); 72 | expect(results).toHaveLength(0); 73 | }); 74 | }); 75 | -------------------------------------------------------------------------------- /tests/server/config-loader.spec.ts: -------------------------------------------------------------------------------- 1 | import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises"; 2 | import { tmpdir } from "node:os"; 3 | import { join } from "node:path"; 4 | 5 | import { describe, expect, it, afterEach } from "vitest"; 6 | 7 | import type { PathMultiplier } from "../../src/server/boost-profiles.js"; 8 | import { loadPathPenalties } from "../../src/server/config-loader.js"; 9 | 10 | describe("config-loader: path penalties", () => { 11 | const originalCwd = process.cwd(); 12 | const createdEnv: string[] = []; 13 | let tempDir: string | null = null; 14 | 15 | afterEach(async () => { 16 | for (const key of createdEnv.splice(0)) { 17 | delete process.env[key]; 18 | } 19 | if (tempDir) { 20 | await rm(tempDir, { recursive: true, force: true }); 21 | tempDir = null; 22 | } 23 | process.chdir(originalCwd); 24 | }); 25 | 26 | it("merges profile < env < YAML with longest-prefix ordering", async () => { 27 | tempDir = await mkdtemp(join(tmpdir(), "kiri-config-")); 28 | const configDir = join(tempDir, ".kiri"); 29 | await mkdir(configDir, { recursive: true }); 30 | await writeFile( 31 | join(configDir, "config.yaml"), 32 | `path_penalties: 33 | - prefix: src/ 34 | multiplier: 2.0 35 | - prefix: external/ 36 | multiplier: 0.25 37 | ` 38 | ); 39 | 40 | const envKey = "KIRI_PATH_PENALTY_src__api__"; 41 | process.env[envKey] = "0.5"; 42 | createdEnv.push(envKey); 43 | 44 | process.chdir(tempDir); 45 | 46 | const base: PathMultiplier[] = [ 47 | { prefix: "src/", multiplier: 1.0 }, 48 | { prefix: "external/", multiplier: 0.9 }, 49 | ]; 50 | 51 | const merged = loadPathPenalties(base); 52 | 53 | expect(merged).toEqual([ 54 | { prefix: "external/", multiplier: 0.25 }, // YAML overrides profile 55 | { prefix: "src/api/", multiplier: 0.5 }, // env adds nested prefix 56 | { prefix: "src/", multiplier: 2.0 }, // YAML overrides profile 57 | ]); 58 | }); 59 | 60 | it("normalizes prefixes and rejects invalid env multipliers", async () => { 61 | tempDir = await mkdtemp(join(tmpdir(), "kiri-config-")); 62 | const configDir = join(tempDir, ".kiri"); 63 | await mkdir(configDir, { recursive: true }); 64 | await writeFile( 65 | join(configDir, "config.yaml"), 66 | `path_penalties: 67 | - prefix: src\\\\feature\\\\ 68 | multiplier: 1.1 69 | ` 70 | ); 71 | 72 | process.chdir(tempDir); 73 | 74 | const normalized = loadPathPenalties(); 75 | expect(normalized).toEqual([{ prefix: "src/feature/", multiplier: 1.1 }]); 76 | 77 | const badEnvKey = "KIRI_PATH_PENALTY_external__"; 78 | process.env[badEnvKey] = "not-a-number"; 79 | createdEnv.push(badEnvKey); 80 | 81 | expect(() => loadPathPenalties()).toThrow(/Invalid multiplier/); 82 | }); 83 | }); 84 | -------------------------------------------------------------------------------- /tests/server/resolve-repo.spec.ts: -------------------------------------------------------------------------------- 1 | import { mkdtemp, rm, symlink } from "node:fs/promises"; 2 | import { tmpdir } from "node:os"; 3 | import { join } from "node:path"; 4 | 5 | import { afterEach, describe, expect, it } from "vitest"; 6 | 7 | import { ensureBaseSchema } from "../../src/indexer/schema.js"; 8 | import { resolveRepoId } from "../../src/server/handlers.js"; 9 | import { DuckDBClient } from "../../src/shared/duckdb.js"; 10 | import { createTempRepo } from "../helpers/test-repo.js"; 11 | 12 | describe("resolveRepoId legacy compatibility", () => { 13 | const cleanup: Array<() => Promise> = []; 14 | 15 | afterEach(async () => { 16 | while (cleanup.length > 0) { 17 | const dispose = cleanup.pop(); 18 | if (dispose) { 19 | await dispose(); 20 | } 21 | } 22 | }); 23 | 24 | async function setupLegacyRepo() { 25 | const repo = await createTempRepo({ 26 | "src/lib.ts": "export const version = '1.0.0';\n", 27 | }); 28 | cleanup.push(repo.cleanup); 29 | 30 | const aliasDir = await mkdtemp(join(tmpdir(), "kiri-resolve-alias-")); 31 | const aliasPath = join(aliasDir, "worktree"); 32 | await symlink(repo.path, aliasPath); 33 | cleanup.push(async () => { 34 | await rm(aliasDir, { recursive: true, force: true }); 35 | }); 36 | 37 | const dbDir = await mkdtemp(join(tmpdir(), "kiri-resolve-db-")); 38 | const dbPath = join(dbDir, "index.duckdb"); 39 | cleanup.push(async () => { 40 | await rm(dbDir, { recursive: true, force: true }); 41 | }); 42 | 43 | const db = await DuckDBClient.connect({ databasePath: dbPath }); 44 | await ensureBaseSchema(db); 45 | await db.run("INSERT INTO repo (id, root, default_branch) VALUES (7, ?, 'main')", [aliasPath]); 46 | await db.close(); 47 | 48 | return { repo, aliasPath, dbPath }; 49 | } 50 | 51 | it("updates repo rows when called with alias path", async () => { 52 | const { repo, aliasPath, dbPath } = await setupLegacyRepo(); 53 | const db = await DuckDBClient.connect({ databasePath: dbPath }); 54 | 55 | const repoId = await resolveRepoId(db, aliasPath); 56 | expect(repoId).toBe(7); 57 | 58 | const rows = await db.all<{ root: string }>("SELECT root FROM repo WHERE id = 7"); 59 | expect(rows[0]?.root).toBe(repo.path); 60 | 61 | await db.close(); 62 | }); 63 | 64 | it("recovers legacy rows when only the normalized path is provided", async () => { 65 | const { repo, dbPath } = await setupLegacyRepo(); 66 | const db = await DuckDBClient.connect({ databasePath: dbPath }); 67 | 68 | const repoId = await resolveRepoId(db, repo.path); 69 | expect(repoId).toBe(7); 70 | 71 | const rows = await db.all<{ root: string }>("SELECT root FROM repo WHERE id = 7"); 72 | expect(rows[0]?.root).toBe(repo.path); 73 | 74 | await db.close(); 75 | }); 76 | }); 77 | -------------------------------------------------------------------------------- /src/indexer/codeintel.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Code Intelligence API 3 | * 4 | * 言語アナライザーシステムの公開APIを提供するファサード。 5 | * 実際の解析処理は codeintel/ モジュールの LanguageRegistry に委譲。 6 | * 7 | * 使用例: 8 | * ```typescript 9 | * import { analyzeSource } from './codeintel.js'; 10 | * 11 | * const result = await analyzeSource('src/index.ts', 'TypeScript', content, fileSet); 12 | * ``` 13 | */ 14 | 15 | // 型とユーティリティをre-export 16 | export type { 17 | SymbolRecord, 18 | SnippetRecord, 19 | DependencyRecord, 20 | AnalysisContext, 21 | AnalysisResult, 22 | } from "./codeintel/types.js"; 23 | 24 | export { buildFallbackSnippet } from "./codeintel/utils.js"; 25 | 26 | // Registry と Analyzer をインポート 27 | import { createDartAnalyzer } from "./codeintel/dart/index.js"; 28 | import { createJavaAnalyzer } from "./codeintel/java/index.js"; 29 | import { createPHPAnalyzer } from "./codeintel/php/index.js"; 30 | import { LanguageRegistry } from "./codeintel/registry.js"; 31 | import { createRustAnalyzer } from "./codeintel/rust/index.js"; 32 | import { createSwiftAnalyzer } from "./codeintel/swift/index.js"; 33 | import type { AnalysisResult } from "./codeintel/types.js"; 34 | import { createTypeScriptAnalyzer } from "./codeintel/typescript/index.js"; 35 | 36 | // シングルトンレジストリを初期化 37 | const registry = LanguageRegistry.getInstance(); 38 | 39 | // 全言語アナライザーを登録 40 | registry.register(createTypeScriptAnalyzer()); 41 | registry.register(createSwiftAnalyzer()); 42 | registry.register(createPHPAnalyzer()); 43 | registry.register(createJavaAnalyzer()); 44 | registry.register(createDartAnalyzer()); 45 | registry.register(createRustAnalyzer()); 46 | 47 | /** 48 | * ソースコードを解析してシンボル、スニペット、依存関係を抽出 49 | * 50 | * @param pathInRepo - リポジトリ内のファイルパス 51 | * @param lang - 言語名 (TypeScript, Swift, PHP, Java, Dart など) 52 | * @param content - ファイルコンテンツ 53 | * @param fileSet - リポジトリ内の全ファイルパスセット (依存関係解決用) 54 | * @param workspaceRoot - ワークスペースルート (Dart 解析で必須) 55 | * @returns シンボル、スニペット、依存関係を含む解析結果 56 | */ 57 | export async function analyzeSource( 58 | pathInRepo: string, 59 | lang: string | null, 60 | content: string, 61 | fileSet: Set, 62 | workspaceRoot?: string 63 | ): Promise { 64 | const normalizedLang = lang ?? ""; 65 | 66 | // サポート対象言語かチェック 67 | if (!registry.isSupported(normalizedLang)) { 68 | return { symbols: [], snippets: [], dependencies: [] }; 69 | } 70 | 71 | // レジストリ経由で解析を実行 72 | // exactOptionalPropertyTypes対応: workspaceRootはundefinedの場合は省略 73 | return await registry.analyze(normalizedLang, { 74 | pathInRepo, 75 | content, 76 | fileSet, 77 | ...(workspaceRoot !== undefined && { workspaceRoot }), 78 | }); 79 | } 80 | 81 | /** 82 | * 全言語アナライザーのリソースをクリーンアップ 83 | * 84 | * プロセス終了前に呼び出すことで、 85 | * Dart Analysis Server などの外部プロセスを適切に終了 86 | */ 87 | export async function cleanup(): Promise { 88 | await registry.cleanup(); 89 | } 90 | -------------------------------------------------------------------------------- /tests/eval/goldens/queries-core4.yaml: -------------------------------------------------------------------------------- 1 | schemaVersion: "1.0.0" 2 | datasetVersion: "v2025-11-docs-plain" 3 | description: "Core low-score queries subset" 4 | defaultParams: 5 | k: 10 6 | tool: "context_bundle" 7 | boostProfile: "default" 8 | timeoutMs: 30000 9 | 10 | defaultRepo: "vscode" 11 | repos: 12 | vscode: 13 | repoPath: "external/vscode" 14 | dbPath: "external/vscode/.kiri/index.duckdb" 15 | assay-kit: 16 | repoPath: "external/assay-kit" 17 | dbPath: "external/assay-kit/.kiri/index.duckdb" 18 | 19 | queries: 20 | - id: vscode-microtask-debug 21 | query: "microtaskQueue tracer schedule vs/base" 22 | tool: "context_bundle" 23 | intent: "Locate microtask queue tracer scheduling logic" 24 | category: "debug" 25 | repo: "vscode" 26 | params: 27 | boostProfile: "vscode" 28 | expected: 29 | paths: 30 | - "src/vs/platform/telemetry/common/telemetryUtils.ts" 31 | hints: 32 | - "telemetryUtils.ts" 33 | - "microtask" 34 | - "tracer" 35 | 36 | - id: vscode-task-terminal 37 | query: "TaskTerminalShellIntegration registerRunProfiles" 38 | tool: "context_bundle" 39 | intent: "Find where task terminal shell integration registers run profiles" 40 | category: "feature" 41 | repo: "vscode" 42 | params: 43 | boostProfile: "vscode" 44 | expected: 45 | paths: 46 | - "src/vs/workbench/contrib/tasks/browser/terminalTaskSystem.ts" 47 | hints: 48 | - "terminalTaskSystem.ts" 49 | - "registerRunProfiles" 50 | 51 | - id: vscode-tree-data-provider 52 | query: "TreeDataProvider registerTreeDataProvider" 53 | tool: "context_bundle" 54 | intent: "Locate TreeDataProvider API implementation" 55 | category: "api" 56 | repo: "vscode" 57 | params: 58 | boostProfile: "vscode" 59 | expected: 60 | paths: 61 | - "src/vs/workbench/api/common/extHostTreeViews.ts" 62 | hints: 63 | - "extHostTreeViews.ts" 64 | - "registerTreeDataProvider" 65 | 66 | - id: assay-dataset-loader 67 | query: "loadDatasetFromYAML schema validation error" 68 | tool: "context_bundle" 69 | intent: "Find dataset loader and schema definitions" 70 | category: "assay" 71 | repo: "assay-kit" 72 | params: 73 | boostProfile: "default" 74 | expected: 75 | paths: 76 | - "packages/assay-kit/src/dataset/loader.ts" 77 | - "packages/assay-kit/src/dataset/schemas/dataset.schema.ts" 78 | - "packages/assay-kit/src/dataset/schemas/query.schema.ts" 79 | hints: 80 | - "packages/assay-kit/src/dataset/loader.ts" 81 | - "packages/assay-kit/src/dataset/schemas/dataset.schema.ts" 82 | - "packages/assay-kit/src/dataset/schemas/query.schema.ts" 83 | - "loadDatasetFromYAML" 84 | - "schema" 85 | - "validation" 86 | - "dataset" 87 | - "loader" 88 | -------------------------------------------------------------------------------- /src/indexer/dart/pathKey.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Path normalization utilities for Windows case-insensitive filesystem support 3 | * 4 | * Fix #3 & #5: Normalize paths on Windows to prevent Map key collisions 5 | * when same physical path is referenced with different casing (C:\repo vs c:\repo) 6 | */ 7 | 8 | import { realpathSync } from "node:fs"; 9 | import path from "node:path"; 10 | 11 | /** 12 | * Normalize workspace root path for use as Map key 13 | * 14 | * Windows: Resolves to real path, converts to forward slashes, lowercases 15 | * Unix: Returns normalized absolute path 16 | * 17 | * @param workspaceRoot - Workspace root path (may be relative) 18 | * @returns Normalized key suitable for Map/Set operations 19 | */ 20 | export function normalizeWorkspaceKey(workspaceRoot: string): string { 21 | // Normalize to absolute path first 22 | const normalized = path.resolve(workspaceRoot); 23 | 24 | if (process.platform === "win32") { 25 | try { 26 | // Resolve symlinks/junctions to real path 27 | const realPath = realpathSync.native(normalized); 28 | // Convert backslashes to forward slashes and lowercase 29 | return realPath.replace(/\\/g, "/").toLowerCase(); 30 | } catch { 31 | // If path doesn't exist yet, fall back to normalized path 32 | return normalized.replace(/\\/g, "/").toLowerCase(); 33 | } 34 | } 35 | 36 | return normalized; 37 | } 38 | 39 | /** 40 | * Normalize file path for use as Map key 41 | * 42 | * Windows: Normalizes path, converts to forward slashes, lowercases 43 | * Unix: Returns normalized absolute path 44 | * 45 | * Note: Does NOT resolve symlinks for files (unlike workspace roots) 46 | * because overlay files may not exist on disk yet. 47 | * 48 | * @param filePath - File path (absolute or relative) 49 | * @param workspaceRoot - Optional workspace root for resolving relative paths 50 | * @returns Normalized key suitable for Map/Set operations 51 | */ 52 | export function normalizeFileKey(filePath: string, workspaceRoot?: string): string { 53 | // Resolve to absolute path 54 | const normalized = workspaceRoot ? path.resolve(workspaceRoot, filePath) : path.resolve(filePath); 55 | 56 | if (process.platform === "win32") { 57 | // Normalize and lowercase for case-insensitive comparison 58 | return path.normalize(normalized).replace(/\\/g, "/").toLowerCase(); 59 | } 60 | 61 | return normalized; 62 | } 63 | 64 | /** 65 | * Normalize any path for consistent comparison 66 | * 67 | * Generic version for edge cases where workspace vs file distinction doesn't matter 68 | * 69 | * @param inputPath - Any file system path 70 | * @returns Normalized path 71 | */ 72 | export function normalizePath(inputPath: string): string { 73 | const normalized = path.normalize(inputPath); 74 | 75 | if (process.platform === "win32") { 76 | return normalized.replace(/\\/g, "/").toLowerCase(); 77 | } 78 | 79 | return normalized; 80 | } 81 | -------------------------------------------------------------------------------- /tests/eval/goldens/baseline.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.10.0", 3 | "gitSha": "9e59843", 4 | "date": "2025-11-17T01:56:59.937Z", 5 | "datasetVersion": "v2025-11-docs-plain", 6 | "description": "Baseline measurement after metadata hint vs docmeta filter split (docs vs docs-plain comparison).", 7 | "thresholds": { 8 | "minP10": 0.7, 9 | "minR5": 0.5, 10 | "maxTTFU": 1000, 11 | "minMetadataPassRate": 1, 12 | "minInboundPassRate": 1, 13 | "description": "Target metrics from docs/overview.md: P@10 ≥ 0.7, TTFU ≤ 1.0s" 14 | }, 15 | "overall": { 16 | "precisionAtK": 0.2863636364, 17 | "avgTTFU": 1.1470588235, 18 | "totalQueries": 22, 19 | "successfulQueries": 17, 20 | "failedQueries": 5, 21 | "avgTokensEstimate": 42461.7273, 22 | "avgBaselineTokens": 457694.1176, 23 | "avgTokenSavingsRatio": 0.8951621479, 24 | "avgHintCoverage": 0.1411764706, 25 | "metadataPassRate": 0.5, 26 | "inboundPassRate": 0.5 27 | }, 28 | "byCategory": { 29 | "editor": { 30 | "precisionAtK": 0.2, 31 | "avgTTFU": 0, 32 | "count": 2, 33 | "avgTokenSavingsRatio": 0.9809868942, 34 | "avgHintCoverage": 0.3, 35 | "metadataPassRate": null, 36 | "inboundPassRate": null 37 | }, 38 | "api": { 39 | "precisionAtK": 0.1333333333, 40 | "avgTTFU": 0, 41 | "count": 3, 42 | "avgTokenSavingsRatio": 0.8790261687, 43 | "avgHintCoverage": 0.1666666667, 44 | "metadataPassRate": null, 45 | "inboundPassRate": null 46 | }, 47 | "debug": { 48 | "precisionAtK": 0.15, 49 | "avgTTFU": 0, 50 | "count": 2, 51 | "avgTokenSavingsRatio": 0.8104621471, 52 | "avgHintCoverage": 0.15, 53 | "metadataPassRate": null, 54 | "inboundPassRate": null 55 | }, 56 | "feature": { 57 | "precisionAtK": 0.125, 58 | "avgTTFU": 0, 59 | "count": 4, 60 | "avgTokenSavingsRatio": 0.9860883952, 61 | "avgHintCoverage": 0.175, 62 | "metadataPassRate": null, 63 | "inboundPassRate": null 64 | }, 65 | "infra": { 66 | "precisionAtK": 0.2, 67 | "avgTTFU": 0, 68 | "count": 1, 69 | "avgTokenSavingsRatio": 0.6274278364, 70 | "avgHintCoverage": 0.3, 71 | "metadataPassRate": null, 72 | "inboundPassRate": null 73 | }, 74 | "docs": { 75 | "precisionAtK": 0.9, 76 | "avgTTFU": 3.9, 77 | "count": 5, 78 | "avgTokenSavingsRatio": 0.8851997017, 79 | "avgHintCoverage": 0, 80 | "metadataPassRate": 1, 81 | "inboundPassRate": 1 82 | }, 83 | "docs-plain": { 84 | "precisionAtK": 0, 85 | "avgTTFU": 0, 86 | "count": 5, 87 | "avgTokenSavingsRatio": null, 88 | "avgHintCoverage": null, 89 | "metadataPassRate": 0, 90 | "inboundPassRate": 0 91 | } 92 | }, 93 | "notes": "Metrics captured from pnpm run eval:golden on 2025-11-17 after metadata hint vs docmeta filter changes." 94 | } 95 | -------------------------------------------------------------------------------- /docs/doc_index.yaml: -------------------------------------------------------------------------------- 1 | # KIRI Document Index 2 | # shirushi によって管理されるドキュメントID一覧 3 | # 形式: {KIND}-{SER3} (例: ARCH-001) 4 | 5 | documents: 6 | # Architecture - コア設計とリファレンス 7 | - doc_id: ARCH-001 8 | path: docs/overview.md 9 | title: "KIRI 概要" 10 | - doc_id: ARCH-002 11 | path: docs/data-model.md 12 | title: "データモデル(DuckDB スキーマ)" 13 | - doc_id: ARCH-003 14 | path: docs/indexer.md 15 | title: "Indexer" 16 | - doc_id: ARCH-004 17 | path: docs/api-and-client.md 18 | title: "API & Client" 19 | - doc_id: ARCH-005 20 | path: docs/search-ranking.md 21 | title: "検索とランキング" 22 | - doc_id: ARCH-006 23 | path: docs/schema-migrations.md 24 | title: "Schema Migrations" 25 | - doc_id: ARCH-007 26 | path: docs/dynamic-profile-selection.md 27 | title: "Dynamic Profile Selection" 28 | - doc_id: ARCH-008 29 | path: docs/dcca/detailed-design-ja.md 30 | title: "詳細設計書" 31 | - doc_id: ARCH-009 32 | path: docs/dcca/semantic-graph.md 33 | title: "KIRI Semantic Graph" 34 | 35 | # ADR - Architecture Decision Records 36 | - doc_id: ADR-001 37 | path: docs/adr/ADR-001-graduated-penalty-system.md 38 | title: "Graduated Penalty System" 39 | - doc_id: ADR-002 40 | path: docs/adr/ADR-002-abbreviation-expansion-for-path-matching.md 41 | title: "Abbreviation Expansion for Path Matching" 42 | 43 | # Runbook - 運用手順 44 | - doc_id: RUN-001 45 | path: docs/runbook.md 46 | title: "運用 Runbook" 47 | - doc_id: RUN-002 48 | path: docs/operations.md 49 | title: "Operations" 50 | 51 | # Guide - ユーザー/開発者向けガイド 52 | - doc_id: GUIDE-001 53 | path: docs/documentation-best-practices.md 54 | title: "Documentation Best Practices" 55 | - doc_id: GUIDE-002 56 | path: docs/user/path-penalties.md 57 | title: "Path Penalties User Guide" 58 | - doc_id: GUIDE-003 59 | path: docs/user/path-penalties.ja.md 60 | title: "Path Penalties ユーザーガイド(日本語)" 61 | - doc_id: GUIDE-004 62 | path: docs/dev/path-penalties.md 63 | title: "Path Penalties Developer Guide" 64 | - doc_id: GUIDE-005 65 | path: docs/dev/node-version.md 66 | title: "Node Version Requirements" 67 | - doc_id: GUIDE-006 68 | path: docs/setup.md 69 | title: "Setup Guide" 70 | - doc_id: GUIDE-007 71 | path: docs/tools-reference.md 72 | title: "MCP Tools Reference" 73 | - doc_id: GUIDE-008 74 | path: docs/configuration.md 75 | title: "Configuration Guide" 76 | 77 | # Security - セキュリティドキュメント 78 | - doc_id: SEC-001 79 | path: docs/security.md 80 | title: "セキュリティとコンプライアンス" 81 | - doc_id: SEC-002 82 | path: docs/processes/security-review.md 83 | title: "Security Review Process" 84 | 85 | # Testing - テスト戦略 86 | - doc_id: TEST-001 87 | path: docs/testing.md 88 | title: "テストと評価" 89 | 90 | # Planning - ロードマップと計画 91 | - doc_id: PLAN-001 92 | path: docs/roadmap.md 93 | title: "Roadmap" 94 | - doc_id: PLAN-002 95 | path: docs/principles.md 96 | title: "開発原則と未解決課題" 97 | -------------------------------------------------------------------------------- /docs/overview.md: -------------------------------------------------------------------------------- 1 | --- 2 | doc_id: "ARCH-001" 3 | title: "KIRI 概要" 4 | category: "architecture" 5 | tags: 6 | - overview 7 | - architecture 8 | - docs 9 | - llm 10 | service: "kiri" 11 | --- 12 | 13 | # KIRI 概要 14 | 15 | ## 基本情報 16 | 17 | - **バージョン**: v0.1 (Draft) 18 | - **更新日**: 2025-10-28 19 | - **オーナー**: りずさん 20 | - **目的**: Git ワークツリーの構造・履歴・近接・意味を合成し、LLM 向けに最小限の文脈断片を即時返却するプラットフォームを構築する。 21 | - **推奨 Node バージョン**: 20.x (LTS) 22 | 23 | ## 用語 24 | 25 | - **Indexer**: Git ワークツリーを走査して DuckDB に書き込む取り込みプロセス。 26 | - **MCP Server**: KIRI の検索機能を MCP(JSON-RPC over stdio/HTTP)ツールとして提供するサーバー。 27 | - **Client**: Codex CLI など MCP ツールを呼び出すクライアント。 28 | - **断片(Snippet)**: 関数やクラスなど意味境界に沿って抽出した行範囲。 29 | 30 | ## 関連ドキュメント 31 | 32 | - [運用 Runbook](./runbook.md#運用-runbook): デグレード時の復旧と観測手順。Front matter の `tags: [operations, degrade, observability]` を使用。 33 | - [検索とランキング指針](./search-ranking.md#検索とランキング): `boost_profile` やメタデータブーストの詳細。 34 | - [テストと評価](./testing.md#テストと評価): ゴールデンセットやカバレッジ指標の延長戦ガイド。 35 | 36 | ## メトリクス/観測 37 | 38 | - Prometheus exporter は `src/server/observability/metrics.ts` で登録。 39 | - Adaptive K 関連: `adaptive_k_selected_total{enabled,category,k}`(選択Kの分布)、 40 | `adaptive_k_deviation_total{category,requested}`(ユーザ指定limitがallowedSet外のとき)。 41 | 42 | ## 目標と非目標 43 | 44 | ### 目標 45 | 46 | - **P@10 ≥ 0.7**: 上位 10 断片中 7 つ以上が実務で有用。 47 | - **TTFU ≤ 1.0s**: ローカル実行で初回有用断片を 1 秒以内に返却。 48 | - **Token 削減 ≥ 40%**: 従来の貼り付け方式と比較してプロンプトトークンを 40%以上削減。 49 | - **Degrade 運転**: FTS/VSS 拡張なしでも文字列+構造近接検索で稼働。 50 | 51 | ### 非目標 52 | 53 | - IDE の完全代替や自動リファクタリング等の包括的コードインサイト。 54 | - リポジトリ横断の厳密なコールグラフ生成(近似で十分)。 55 | 56 | ## 全体アーキテクチャ 57 | 58 | ``` 59 | +--------------------+ +-----------------------------+ +--------------------+ 60 | | MCP Client |<--->| KIRI MCP Server (JSON-RPC) |<--->| DuckDB | 61 | | (Codex CLI, etc.) | | tools: search/bundle/... | | index.duckdb | 62 | +--------------------+ +-----------------------------+ +--------------------+ 63 | ^ 64 | | 65 | +---------+----------+ 66 | | Indexer | 67 | | git scan / AST | 68 | | embedding (opt) | 69 | +-------------------+ 70 | ``` 71 | 72 | - **Indexer** が Git から構造・履歴・本文・埋め込みを DuckDB に書き込む。 73 | - **MCP Server** が DuckDB を叩き、`files_search` や `context_bundle` などのツールを公開する。 74 | - **Client** は `context_bundle` で得た断片を LLM プロンプトへ注入する。 75 | 76 | ## ドメイン用語辞書 77 | 78 | - 定義ファイル: `config/domain-terms.yml`(`.kiri/domain-terms.yml` でも可)。camelCase/スペース/アンダースコアは正規化され、ハイフン小文字+分割トークンも生成。 79 | - スキーマ: カテゴリ配下に `{canonical: {aliases, files}}` の配列を並べる。例は `config/domain-terms.yml` を参照。 80 | - フィーチャーフラグ: デフォルト OFF。`KIRI_ENABLE_DOMAIN_TERMS=1` を付けて `context_bundle` サーバーを起動すると有効化され、辞書エイリアスとファイルヒントをブースト対象に追加する(`dictionary:hint:` が `why` に付与)。 81 | - 更新手順: 辞書を編集したら `pnpm exec vitest run tests/server/domain-terms.spec.ts` で構文を確認し、必要に応じて `tests/server/context.bundle.spec.ts` も併せて実行。 82 | -------------------------------------------------------------------------------- /scripts/diag/build-hint-dictionary.ts: -------------------------------------------------------------------------------- 1 | import { resolve } from "node:path"; 2 | import process from "node:process"; 3 | 4 | import { DuckDBClient } from "../../src/shared/duckdb.js"; 5 | import { normalizeRepoPath } from "../../src/shared/utils/path.js"; 6 | 7 | interface BuildArgs { 8 | databasePath: string; 9 | repoRoot: string; 10 | minFreq: number; 11 | } 12 | 13 | function parseArgs(argv: string[]): BuildArgs { 14 | let databasePath: string | undefined; 15 | let repoRoot: string | undefined; 16 | let minFreq = 1; 17 | for (let i = 0; i < argv.length; i += 1) { 18 | const arg = argv[i]; 19 | if (arg === "--db") { 20 | databasePath = argv[++i]; 21 | } else if (arg === "--repo") { 22 | repoRoot = argv[++i]; 23 | } else if (arg === "--min-freq") { 24 | const value = Number.parseInt(argv[++i] ?? "", 10); 25 | if (Number.isFinite(value) && value >= 1) { 26 | minFreq = value; 27 | } 28 | } 29 | } 30 | if (!databasePath || !repoRoot) { 31 | throw new Error("build-hint-dictionary requires --db and --repo "); 32 | } 33 | return { databasePath, repoRoot, minFreq }; 34 | } 35 | 36 | export async function main(argv = process.argv.slice(2)): Promise { 37 | const args = parseArgs(argv); 38 | const normalizedRepoRoot = normalizeRepoPath(resolve(args.repoRoot)); 39 | const db = await DuckDBClient.connect({ 40 | databasePath: args.databasePath, 41 | ensureDirectory: false, 42 | }); 43 | try { 44 | const repoRows = await db.all<{ id: number }>( 45 | `SELECT id FROM repo WHERE root = ? OR normalized_root = ? LIMIT 1`, 46 | [normalizedRepoRoot, normalizedRepoRoot] 47 | ); 48 | if (repoRows.length === 0) { 49 | throw new Error(`Repository not found for root: ${normalizedRepoRoot}`); 50 | } 51 | const repoId = repoRows[0]!.id; 52 | 53 | await db.run(`DELETE FROM hint_dictionary WHERE repo_id = ?`, [repoId]); 54 | 55 | await db.run( 56 | ` 57 | INSERT INTO hint_dictionary (repo_id, hint_value, target_path, freq, updated_at) 58 | SELECT ?, hint_value, target_path, cnt, CURRENT_TIMESTAMP 59 | FROM ( 60 | SELECT hint_value, 61 | target_path, 62 | COUNT(*) AS cnt 63 | FROM hint_expansion 64 | WHERE repo_id = ? 65 | AND target_path IS NOT NULL 66 | GROUP BY hint_value, target_path 67 | ) 68 | WHERE cnt >= ? 69 | `, 70 | [repoId, repoId, args.minFreq] 71 | ); 72 | 73 | console.info( 74 | `Hint dictionary rebuilt for repo_id=${repoId} (root=${normalizedRepoRoot}) with min_freq=${args.minFreq}.` 75 | ); 76 | } finally { 77 | await db.close(); 78 | } 79 | } 80 | 81 | const executedDirectly = 82 | typeof process.argv[1] === "string" && new URL(import.meta.url).pathname === process.argv[1]; 83 | 84 | if (executedDirectly) { 85 | main().catch((error) => { 86 | console.error("Failed to build hint dictionary:", error); 87 | process.exitCode = 1; 88 | }); 89 | } 90 | -------------------------------------------------------------------------------- /tests/shared/cli/testHelpers.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Test helpers for CLI testing 3 | */ 4 | 5 | import { execa } from "execa"; 6 | import { describe, it, expect } from "vitest"; 7 | 8 | /** 9 | * Test suite configuration for CLI testing 10 | */ 11 | export interface CliTestConfig { 12 | /** Path to the compiled CLI entry point */ 13 | cliPath: string; 14 | /** Command name (e.g., "kiri", "kiri-server") */ 15 | commandName: string; 16 | /** Command description for help text */ 17 | description: string; 18 | /** Expected sections in help output */ 19 | expectedSections?: string[]; 20 | } 21 | 22 | /** 23 | * Create standardized CLI test suite 24 | * 25 | * @param config - CLI test configuration 26 | * @returns Test suite functions 27 | */ 28 | export function createCliTests(config: CliTestConfig) { 29 | const { cliPath, commandName, description, expectedSections = [] } = config; 30 | 31 | return { 32 | /** 33 | * Test --help flag functionality 34 | */ 35 | testHelp: () => { 36 | it("should display help message with --help flag", async () => { 37 | const { stdout, exitCode } = await execa("node", [cliPath, "--help"]); 38 | 39 | expect(exitCode).toBe(0); 40 | expect(stdout).toContain(description); 41 | expect(stdout).toContain(`Usage: ${commandName} [options]`); 42 | expect(stdout).toContain("Common:"); 43 | expect(stdout).toContain("--help"); 44 | expect(stdout).toContain("--version"); 45 | expect(stdout).toContain("Examples:"); 46 | 47 | // Check expected sections if provided 48 | for (const section of expectedSections) { 49 | expect(stdout).toContain(section); 50 | } 51 | }); 52 | 53 | it("should display help message with -h short flag", async () => { 54 | const { stdout, exitCode } = await execa("node", [cliPath, "-h"]); 55 | 56 | expect(exitCode).toBe(0); 57 | expect(stdout).toContain(description); 58 | expect(stdout).toContain(`Usage: ${commandName} [options]`); 59 | }); 60 | }, 61 | 62 | /** 63 | * Test --version flag functionality 64 | */ 65 | testVersion: () => { 66 | it("should display version with --version flag", async () => { 67 | const { stdout, exitCode } = await execa("node", [cliPath, "--version"]); 68 | 69 | expect(exitCode).toBe(0); 70 | expect(stdout).toMatch(new RegExp(`^${commandName} v\\d+\\.\\d+\\.\\d+$`)); 71 | }); 72 | 73 | it("should display version with -v short flag", async () => { 74 | const { stdout, exitCode } = await execa("node", [cliPath, "-v"]); 75 | 76 | expect(exitCode).toBe(0); 77 | expect(stdout).toMatch(new RegExp(`^${commandName} v\\d+\\.\\d+\\.\\d+$`)); 78 | }); 79 | }, 80 | 81 | /** 82 | * Run all standard CLI tests 83 | */ 84 | runAll: () => { 85 | describe(`${commandName} CLI`, () => { 86 | createCliTests(config).testHelp(); 87 | createCliTests(config).testVersion(); 88 | }); 89 | }, 90 | }; 91 | } 92 | -------------------------------------------------------------------------------- /src/indexer/codeintel/types.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Language analyzer core types 3 | * 4 | * このファイルは言語アナライザーシステムの中核となる型定義を提供します。 5 | * Alloyで検証済みのPlan A (Central Registry) 設計に基づいています。 6 | */ 7 | 8 | /** 9 | * シンボル情報を表すレコード 10 | * クラス、関数、メソッドなどのコード要素を表現します 11 | */ 12 | export interface SymbolRecord { 13 | /** 1から始まる連番ID */ 14 | symbolId: number; 15 | /** シンボル名 */ 16 | name: string; 17 | /** シンボルの種類 (function, class, method, interface, enum等) */ 18 | kind: string; 19 | /** 開始行番号 (1-based) */ 20 | rangeStartLine: number; 21 | /** 終了行番号 (1-based) */ 22 | rangeEndLine: number; 23 | /** シグネチャ (最大200文字) */ 24 | signature: string | null; 25 | /** ドキュメントコメント */ 26 | doc: string | null; 27 | } 28 | 29 | /** 30 | * スニペット情報を表すレコード 31 | * コードの行範囲を表現します 32 | */ 33 | export interface SnippetRecord { 34 | /** 開始行番号 (1-based) */ 35 | startLine: number; 36 | /** 終了行番号 (1-based) */ 37 | endLine: number; 38 | /** 関連するシンボルID (nullの場合はファイル全体) */ 39 | symbolId: number | null; 40 | } 41 | 42 | /** 43 | * 依存関係を表すレコード 44 | * import/requireなどの依存関係を表現します 45 | */ 46 | export interface DependencyRecord { 47 | /** 依存先の種類 */ 48 | dstKind: "path" | "package"; 49 | /** 依存先のパスまたはパッケージ名 */ 50 | dst: string; 51 | /** 関係の種類 (import, require等) */ 52 | rel: string; 53 | } 54 | 55 | /** 56 | * 解析コンテキスト 57 | * アナライザーに渡される入力情報 58 | */ 59 | export interface AnalysisContext { 60 | /** リポジトリルートからの相対パス */ 61 | pathInRepo: string; 62 | /** ファイルの内容 */ 63 | content: string; 64 | /** インデックス済みファイルのセット (相対パス解決用) */ 65 | fileSet: Set; 66 | /** ワークスペースルート (LSPベースのアナライザー用、絶対パス) */ 67 | workspaceRoot?: string; 68 | } 69 | 70 | /** 71 | * 解析結果 72 | * アナライザーが返す出力情報 73 | */ 74 | export interface AnalysisResult { 75 | /** 抽出されたシンボル一覧 */ 76 | symbols: SymbolRecord[]; 77 | /** 生成されたスニペット一覧 */ 78 | snippets: SnippetRecord[]; 79 | /** 検出された依存関係一覧 */ 80 | dependencies: DependencyRecord[]; 81 | /** 解析ステータス (エラー時に設定) */ 82 | status?: "success" | "error" | "sdk_unavailable"; 83 | /** エラーメッセージ (エラー時に設定) */ 84 | error?: string; 85 | } 86 | 87 | /** 88 | * 言語アナライザーインターフェース 89 | * 90 | * 実装要件: 91 | * 1. ステートレスまたは内部で状態を管理 (スレッドセーフ) 92 | * 2. パースエラー時は空の結果を返す (例外をスローしない) 93 | * 3. 並行解析リクエストをサポート 94 | * 95 | * Alloyモデルの `Analyzer` atomに対応: 96 | * - language: 言語識別子 97 | * - analyze: 解析メソッド 98 | * - dispose: リソース解放メソッド (オプション) 99 | */ 100 | export interface LanguageAnalyzer { 101 | /** 言語識別子 (例: "TypeScript", "Swift") */ 102 | readonly language: string; 103 | 104 | /** 105 | * ソースコードを解析してシンボル、スニペット、依存関係を抽出 106 | * 107 | * @param context - 解析コンテキスト 108 | * @returns 解析結果 (エラー時も例外をスローせず空の結果を返す) 109 | */ 110 | analyze(context: AnalysisContext): Promise; 111 | 112 | /** 113 | * アナライザーが保持するリソースを解放 114 | * レジストリからの削除時に呼び出される 115 | */ 116 | dispose?(): Promise; 117 | } 118 | 119 | /** 120 | * 空の解析結果を生成するヘルパー 121 | */ 122 | export function emptyResult(): AnalysisResult { 123 | return { symbols: [], snippets: [], dependencies: [] }; 124 | } 125 | -------------------------------------------------------------------------------- /src/shared/embedding.ts: -------------------------------------------------------------------------------- 1 | import { createHash } from "node:crypto"; 2 | 3 | import { tokenizeText } from "./tokenizer.js"; 4 | 5 | export interface EmbeddingVector { 6 | dims: number; 7 | values: number[]; 8 | } 9 | 10 | const DEFAULT_DIMS = 64; 11 | 12 | function hashToken(token: string): number { 13 | const digest = createHash("sha256").update(token).digest(); 14 | // Use the first four bytes to build a deterministic integer hash 15 | const byte0 = digest[0] ?? 0; 16 | const byte1 = digest[1] ?? 0; 17 | const byte2 = digest[2] ?? 0; 18 | const byte3 = digest[3] ?? 0; 19 | return ((byte0 << 24) | (byte1 << 16) | (byte2 << 8) | byte3) >>> 0; 20 | } 21 | 22 | function applyToken(vector: number[], token: string): void { 23 | const hash = hashToken(token); 24 | const index = hash % vector.length; 25 | const sign = (hash & 1) === 0 ? 1 : -1; 26 | const weight = Math.log(1 + token.length); 27 | const current = vector[index]; 28 | if (current !== undefined) { 29 | vector[index] = current + sign * weight; 30 | } 31 | } 32 | 33 | function normalize(values: number[]): number[] { 34 | const norm = Math.sqrt(values.reduce((sum, value) => sum + value * value, 0)); 35 | if (!Number.isFinite(norm) || norm === 0) { 36 | return values.map(() => 0); 37 | } 38 | return values.map((value) => value / norm); 39 | } 40 | 41 | export function generateEmbedding(text: string, dims = DEFAULT_DIMS): EmbeddingVector | null { 42 | if (!text || text.trim().length === 0) { 43 | return null; 44 | } 45 | const tokens = tokenizeText(text); 46 | if (tokens.length === 0) { 47 | return null; 48 | } 49 | const vector = new Array(dims).fill(0); 50 | for (const token of tokens) { 51 | applyToken(vector, token); 52 | } 53 | return { dims, values: normalize(vector) }; 54 | } 55 | 56 | /** 57 | * Calculate structural similarity between two embedding vectors using cosine similarity. 58 | * Note: This measures syntactic/structural similarity based on LSH (Locality-Sensitive Hashing), 59 | * not semantic similarity from language models like BERT or GPT embeddings. 60 | * 61 | * @param a - First embedding vector 62 | * @param b - Second embedding vector 63 | * @returns Similarity score between 0 and 1 64 | */ 65 | export function structuralSimilarity(a: number[], b: number[]): number { 66 | const length = Math.min(a.length, b.length); 67 | if (length === 0) { 68 | return 0; 69 | } 70 | let dot = 0; 71 | let normA = 0; 72 | let normB = 0; 73 | for (let i = 0; i < length; i += 1) { 74 | const valueA = a[i]; 75 | const valueB = b[i]; 76 | if (valueA === undefined || valueB === undefined) { 77 | continue; 78 | } 79 | dot += valueA * valueB; 80 | normA += valueA * valueA; 81 | normB += valueB * valueB; 82 | } 83 | if (normA === 0 || normB === 0) { 84 | return 0; 85 | } 86 | return dot / Math.sqrt(normA * normB); 87 | } 88 | 89 | /** @deprecated Use structuralSimilarity() instead. Kept for backward compatibility. */ 90 | export const cosineSimilarity = structuralSimilarity; 91 | 92 | export const EMBEDDING_DIMS = DEFAULT_DIMS; 93 | -------------------------------------------------------------------------------- /tests/integration/security.lock.integration.spec.ts: -------------------------------------------------------------------------------- 1 | import { access, mkdtemp, readFile, rm } from "node:fs/promises"; 2 | import { tmpdir } from "node:os"; 3 | import { join } from "node:path"; 4 | 5 | import { afterEach, describe, expect, it } from "vitest"; 6 | 7 | import { main as cliMain } from "../../src/client/cli.js"; 8 | import { runIndexer } from "../../src/indexer/cli.js"; 9 | import { createServerRuntime } from "../../src/server/runtime.js"; 10 | import { loadSecurityConfig } from "../../src/shared/security/config.js"; 11 | import { createTempRepo } from "../helpers/test-repo.js"; 12 | 13 | describe("security lock integration", () => { 14 | const cleanupCallbacks: Array<() => Promise> = []; 15 | const expectedHash = loadSecurityConfig().hash; 16 | 17 | afterEach(async () => { 18 | while (cleanupCallbacks.length > 0) { 19 | const cleanup = cleanupCallbacks.pop(); 20 | if (cleanup) { 21 | await cleanup(); 22 | } 23 | } 24 | }); 25 | 26 | it("allows runtime startup with lock created alongside database", async () => { 27 | const repo = await createTempRepo({ "README.md": "# sample\n" }); 28 | cleanupCallbacks.push(repo.cleanup); 29 | 30 | const dbDir = await mkdtemp(join(tmpdir(), "kiri-integration-db-")); 31 | cleanupCallbacks.push(async () => { 32 | await rm(dbDir, { recursive: true, force: true }); 33 | }); 34 | 35 | const dbPath = join(dbDir, "index.duckdb"); 36 | const lockPath = join(dbDir, "security.lock"); 37 | 38 | await runIndexer({ repoRoot: repo.path, databasePath: dbPath, full: true }); 39 | 40 | const exitCode = cliMain(["security", "verify", "--db", dbPath, "--write-lock"]); 41 | expect(exitCode).toBe(0); 42 | 43 | await access(lockPath); 44 | 45 | const runtime = await createServerRuntime({ repoRoot: repo.path, databasePath: dbPath }); 46 | cleanupCallbacks.push(async () => { 47 | await runtime.close(); 48 | }); 49 | 50 | const storedHash = (await readFile(lockPath, "utf-8")).trim(); 51 | expect(storedHash).toBe(expectedHash); 52 | }); 53 | 54 | it("creates missing lock when allowWriteLock is true", async () => { 55 | const repo = await createTempRepo({ "src/app.ts": "export const app = () => 1;\n" }); 56 | cleanupCallbacks.push(repo.cleanup); 57 | 58 | const dbDir = await mkdtemp(join(tmpdir(), "kiri-integration-db-create-")); 59 | cleanupCallbacks.push(async () => { 60 | await rm(dbDir, { recursive: true, force: true }); 61 | }); 62 | 63 | const dbPath = join(dbDir, "index.duckdb"); 64 | const lockPath = join(dbDir, "security.lock"); 65 | 66 | await runIndexer({ repoRoot: repo.path, databasePath: dbPath, full: true }); 67 | 68 | const runtime = await createServerRuntime({ 69 | repoRoot: repo.path, 70 | databasePath: dbPath, 71 | allowWriteLock: true, 72 | }); 73 | cleanupCallbacks.push(async () => { 74 | await runtime.close(); 75 | }); 76 | 77 | await access(lockPath); 78 | const storedHash = (await readFile(lockPath, "utf-8")).trim(); 79 | expect(storedHash).toBe(expectedHash); 80 | }); 81 | }); 82 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: ["main", "master", "develop"] 6 | pull_request: 7 | 8 | jobs: 9 | # ドキュメントID検証(ASSAY_KIT_TOKEN不要) 10 | docs: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout repository 14 | uses: actions/checkout@v4 15 | with: 16 | fetch-depth: 0 # shirushi lint --base で差分検出に必要 17 | 18 | - name: Set up Node.js 19 | uses: actions/setup-node@v4 20 | with: 21 | node-version: 20 22 | 23 | - name: Enable corepack 24 | run: corepack enable 25 | 26 | - name: Install dependencies 27 | run: pnpm install --no-frozen-lockfile 28 | 29 | - name: Lint documentation IDs 30 | run: pnpm docs:lint 31 | 32 | build: 33 | runs-on: ubuntu-latest 34 | env: 35 | HAS_ASSAY_KIT_TOKEN: ${{ secrets.ASSAY_KIT_TOKEN != '' }} 36 | steps: 37 | - name: Checkout repository 38 | uses: actions/checkout@v4 39 | 40 | - name: Check assay-kit token availability 41 | if: env.HAS_ASSAY_KIT_TOKEN != 'true' 42 | run: | 43 | echo "::warning::ASSAY_KIT_TOKEN is not configured. Skipping CI steps that require the private external/assay-kit submodule." 44 | 45 | - name: Configure submodule auth 46 | if: env.HAS_ASSAY_KIT_TOKEN == 'true' 47 | env: 48 | TOKEN: ${{ secrets.ASSAY_KIT_TOKEN }} 49 | run: | 50 | git config --global url."https://x-access-token:${TOKEN}@github.com/".insteadOf git@github.com: 51 | git config --global url."https://x-access-token:${TOKEN}@github.com/".insteadOf https://github.com/ 52 | git config submodule.external/assay-kit.url https://x-access-token:${TOKEN}@github.com/CAPHTECH/assay-kit.git 53 | 54 | - name: Fetch assay-kit submodule 55 | if: env.HAS_ASSAY_KIT_TOKEN == 'true' 56 | run: git submodule update --init --depth 1 external/assay-kit 57 | 58 | - name: Set up Node.js 59 | if: env.HAS_ASSAY_KIT_TOKEN == 'true' 60 | uses: actions/setup-node@v4 61 | with: 62 | node-version: 20 63 | 64 | - name: Enable corepack 65 | if: env.HAS_ASSAY_KIT_TOKEN == 'true' 66 | run: corepack enable 67 | 68 | - name: Install assay-kit dependencies 69 | if: env.HAS_ASSAY_KIT_TOKEN == 'true' 70 | run: | 71 | cd external/assay-kit 72 | pnpm install --no-frozen-lockfile 73 | 74 | - name: Install dependencies 75 | if: env.HAS_ASSAY_KIT_TOKEN == 'true' 76 | run: pnpm install --no-frozen-lockfile 77 | 78 | - name: Run lint 79 | if: env.HAS_ASSAY_KIT_TOKEN == 'true' 80 | run: pnpm run lint 81 | 82 | - name: Run type check 83 | if: env.HAS_ASSAY_KIT_TOKEN == 'true' 84 | run: pnpm exec tsc --noEmit 85 | continue-on-error: true 86 | 87 | - name: Run build 88 | if: env.HAS_ASSAY_KIT_TOKEN == 'true' 89 | run: pnpm run build 90 | continue-on-error: true 91 | 92 | - name: Run tests 93 | if: env.HAS_ASSAY_KIT_TOKEN == 'true' 94 | run: pnpm run test 95 | -------------------------------------------------------------------------------- /src/indexer/migrations/repo-merger.ts: -------------------------------------------------------------------------------- 1 | import { DuckDBClient } from "../../shared/duckdb.js"; 2 | 3 | /** 4 | * Merges duplicate repository records by migrating all dependent rows to a canonical ID 5 | * before deleting the legacy records. 6 | * 7 | * This function ensures data integrity when consolidating multiple repo records that 8 | * represent the same repository. It performs the following steps in a transaction: 9 | * 10 | * 1. Dynamically discovers all tables with a repo_id column 11 | * 2. Updates repo_id in all dependent tables to point to the canonical record 12 | * 3. Deletes the legacy repo records 13 | * 14 | * **CRITICAL**: This function must be used instead of direct DELETE to prevent orphaned 15 | * records in dependent tables (file, symbol, snippet, dependency, etc.). The schema 16 | * does not define foreign key constraints with ON DELETE CASCADE, so dependent rows 17 | * will NOT be automatically cleaned up. 18 | * 19 | * @param db - DuckDB client instance 20 | * @param canonicalRepoId - The repository ID to keep (all others will merge into this) 21 | * @param legacyRepoIds - Array of repository IDs to merge and delete 22 | * 23 | * @throws Error if migration fails (transaction will rollback automatically) 24 | * 25 | * @example 26 | * ```typescript 27 | * // Merge repos with IDs 2 and 3 into repo 1 28 | * await mergeRepoRecords(db, 1, [2, 3]); 29 | * // After: All file/symbol/snippet records now reference repo_id=1 30 | * // Repos 2 and 3 are deleted 31 | * ``` 32 | */ 33 | export async function mergeRepoRecords( 34 | db: DuckDBClient, 35 | canonicalRepoId: number, 36 | legacyRepoIds: number[] 37 | ): Promise { 38 | if (legacyRepoIds.length === 0) { 39 | return; 40 | } 41 | 42 | // Validate that canonical repo exists 43 | const canonicalRepo = await db.all<{ id: number }>("SELECT id FROM repo WHERE id = ?", [ 44 | canonicalRepoId, 45 | ]); 46 | if (canonicalRepo.length === 0) { 47 | throw new Error(`Canonical repo ID ${canonicalRepoId} does not exist`); 48 | } 49 | 50 | // Dynamically discover all tables that reference repo_id 51 | // This ensures we catch any new tables added in the future 52 | const referencingTables = await db.all<{ table_name: string }>( 53 | `SELECT DISTINCT c.table_name 54 | FROM duckdb_columns() AS c 55 | WHERE c.column_name = 'repo_id' 56 | AND c.table_name <> 'repo'` 57 | ); 58 | 59 | // Filter to alphanumeric table names for SQL injection safety 60 | const safeTables = referencingTables 61 | .map((row) => row.table_name) 62 | .filter((name) => /^[A-Za-z0-9_]+$/.test(name)); 63 | 64 | // Perform migration in a transaction to ensure atomicity 65 | await db.transaction(async () => { 66 | for (const legacyRepoId of legacyRepoIds) { 67 | // Migrate all dependent rows to the canonical repo_id 68 | for (const tableName of safeTables) { 69 | await db.run(`UPDATE ${tableName} SET repo_id = ? WHERE repo_id = ?`, [ 70 | canonicalRepoId, 71 | legacyRepoId, 72 | ]); 73 | } 74 | 75 | // Safe to delete now that all dependent rows have been migrated 76 | await db.run("DELETE FROM repo WHERE id = ?", [legacyRepoId]); 77 | } 78 | }); 79 | } 80 | -------------------------------------------------------------------------------- /tests/client/proxy.spec.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Tests for proxy client with automatic daemon restart on version mismatch 3 | * and --full index mode 4 | */ 5 | 6 | import * as fs from "fs/promises"; 7 | import * as os from "os"; 8 | import * as path from "path"; 9 | 10 | import { afterEach, beforeEach, describe, expect, it } from "vitest"; 11 | 12 | import { stopDaemon, isDaemonRunning } from "../../src/client/start-daemon.js"; 13 | 14 | // proxy.ts は直接インポートできないため、start-daemon の stopDaemon をテストして 15 | // 自動再起動の基盤を確認する 16 | 17 | describe("Proxy Daemon Restart", () => { 18 | let tmpDir: string; 19 | let databasePath: string; 20 | 21 | beforeEach(async () => { 22 | tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "kiri-proxy-test-")); 23 | databasePath = path.join(tmpDir, "test.duckdb"); 24 | }); 25 | 26 | afterEach(async () => { 27 | // クリーンアップ 28 | try { 29 | await fs.rm(tmpDir, { recursive: true, force: true }); 30 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 31 | } catch (_err) { 32 | // Ignore cleanup errors 33 | } 34 | }); 35 | 36 | describe("stopDaemon used by proxy for version mismatch recovery", () => { 37 | it("can stop daemon with stale PID file (simulating version mismatch scenario)", async () => { 38 | const pidFilePath = `${databasePath}.daemon.pid`; 39 | const startupLockPath = `${databasePath}.daemon.starting`; 40 | 41 | // シミュレーション: 古いバージョンのデーモンが残っている 42 | const stalePid = 999999; 43 | await fs.writeFile(pidFilePath, String(stalePid), "utf-8"); 44 | await fs.writeFile(startupLockPath, String(stalePid), "utf-8"); 45 | 46 | // stopDaemon が呼ばれると、スタイルPIDファイルをクリーンアップ 47 | await stopDaemon(databasePath); 48 | 49 | // ファイルが削除されていることを確認 50 | await expect(fs.access(pidFilePath)).rejects.toThrow(); 51 | await expect(fs.access(startupLockPath)).rejects.toThrow(); 52 | }); 53 | 54 | it("handles missing PID file gracefully (already cleaned up)", async () => { 55 | // PIDファイルが存在しない場合もエラーにならないことを確認 56 | await expect(stopDaemon(databasePath)).resolves.not.toThrow(); 57 | }); 58 | }); 59 | 60 | describe("--full mode prerequisites", () => { 61 | it("isDaemonRunning returns false when no daemon is running", async () => { 62 | // デーモンが実行されていない場合はfalseを返す 63 | const running = await isDaemonRunning(databasePath); 64 | expect(running).toBe(false); 65 | }); 66 | 67 | it("isDaemonRunning returns false with stale PID file", async () => { 68 | const pidFilePath = `${databasePath}.daemon.pid`; 69 | // 存在しないPIDを書き込む 70 | const stalePid = 999999; 71 | await fs.writeFile(pidFilePath, String(stalePid), "utf-8"); 72 | 73 | // 古いPIDファイルがあってもfalseを返す(プロセスが存在しないため) 74 | const running = await isDaemonRunning(databasePath); 75 | expect(running).toBe(false); 76 | }); 77 | 78 | it("stopDaemon followed by isDaemonRunning returns false", async () => { 79 | const pidFilePath = `${databasePath}.daemon.pid`; 80 | const stalePid = 999999; 81 | await fs.writeFile(pidFilePath, String(stalePid), "utf-8"); 82 | 83 | // stopDaemonでクリーンアップ 84 | await stopDaemon(databasePath); 85 | 86 | // isDaemonRunningはfalseを返す 87 | const running = await isDaemonRunning(databasePath); 88 | expect(running).toBe(false); 89 | }); 90 | }); 91 | }); 92 | -------------------------------------------------------------------------------- /docs/formal/language-support/PlanC_CapabilityComposition.als: -------------------------------------------------------------------------------- 1 | -- Plan C: Capability-based Composition 2 | -- Structural model with explicit capability composition 3 | 4 | module PlanC_CapabilityComposition 5 | 6 | -- Core signatures 7 | abstract sig Language {} 8 | one sig TypeScript, Swift, PHP, Java, Dart, Rust extends Language {} 9 | 10 | abstract sig Capability {} 11 | one sig SymbolExtraction, TypeInference, DocComment, SignatureFormat extends Capability {} 12 | 13 | sig CapabilityProvider { 14 | provides: set Capability, 15 | language: one Language 16 | } 17 | 18 | sig ComposedAnalyzer { 19 | language: one Language, 20 | providers: set CapabilityProvider, 21 | capabilities: set Capability 22 | } 23 | 24 | sig File { 25 | lang: one Language, 26 | lockedBy: lone ComposedAnalyzer 27 | } 28 | 29 | -- Facts 30 | 31 | -- Composed analyzer's capabilities come from its providers 32 | fact CapabilitiesFromProviders { 33 | all a: ComposedAnalyzer | 34 | a.capabilities = { c: Capability | some p: a.providers | c in p.provides } 35 | } 36 | 37 | -- Providers must match analyzer's language 38 | fact ProviderLanguageMatch { 39 | all a: ComposedAnalyzer, p: a.providers | p.language = a.language 40 | } 41 | 42 | -- Every analyzer must have SymbolExtraction capability (minimum requirement) 43 | fact MinimumCapability { 44 | all a: ComposedAnalyzer | SymbolExtraction in a.capabilities 45 | } 46 | 47 | -- File lock requires matching language 48 | fact FileLockLanguageMatch { 49 | all f: File, a: ComposedAnalyzer | f.lockedBy = a implies a.language = f.lang 50 | } 51 | 52 | -- Mutual exclusion: a file locked by at most one analyzer 53 | fact FileLockMutualExclusion { 54 | all f: File | lone f.lockedBy 55 | } 56 | 57 | -- No duplicate providers for same capability 58 | fact NoDuplicateProviders { 59 | all a: ComposedAnalyzer, c: Capability | 60 | lone { p: a.providers | c in p.provides } 61 | } 62 | 63 | -- Assertions 64 | 65 | -- Every analyzer has at least symbol extraction 66 | assert MinimumCapabilityGuaranteed { 67 | all a: ComposedAnalyzer | SymbolExtraction in a.capabilities 68 | } 69 | 70 | -- Capabilities are traceable to providers 71 | assert CapabilityTraceability { 72 | all a: ComposedAnalyzer, c: a.capabilities | 73 | some p: a.providers | c in p.provides 74 | } 75 | 76 | -- File lock consistency 77 | assert FileLockConsistent { 78 | all f: File | some f.lockedBy implies f.lockedBy.language = f.lang 79 | } 80 | 81 | -- Capability completeness: if provider is included, all its caps are available 82 | assert CapabilityCompleteness { 83 | all a: ComposedAnalyzer, p: a.providers, c: p.provides | 84 | c in a.capabilities 85 | } 86 | 87 | -- No orphan providers 88 | assert NoOrphanProviders { 89 | all p: CapabilityProvider | some a: ComposedAnalyzer | p in a.providers 90 | } 91 | 92 | -- Run commands 93 | run showComposedAnalyzer { 94 | some a: ComposedAnalyzer | #a.capabilities >= 3 95 | } for 5 96 | 97 | run showMultipleAnalyzers { 98 | #ComposedAnalyzer > 1 99 | all a: ComposedAnalyzer | #a.providers > 0 100 | } for 6 101 | 102 | check MinimumCapabilityGuaranteed for 10 103 | check CapabilityTraceability for 10 104 | check FileLockConsistent for 10 105 | check CapabilityCompleteness for 10 106 | check NoOrphanProviders for 8 107 | -------------------------------------------------------------------------------- /scripts/datasets/fix-expected-section.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env tsx 2 | /** 3 | * Fix kiri-large-100.yaml expected section 4 | * 5 | * Moves metadata.expected to the correct expected section at the end 6 | */ 7 | 8 | import { readFileSync, writeFileSync } from "node:fs"; 9 | 10 | import { parse, stringify } from "yaml"; 11 | 12 | interface Query { 13 | id: string; 14 | text: string; 15 | goal: string; 16 | metadata: { 17 | category: string; 18 | expected?: string[]; 19 | }; 20 | } 21 | 22 | interface Dataset { 23 | schemaVersion: string; 24 | name: string; 25 | datasetId: string; 26 | description: string; 27 | version: string; 28 | defaultParams: { 29 | k: number; 30 | timeoutMs: number; 31 | }; 32 | queries: Query[]; 33 | expected?: Array<{ 34 | id: string; 35 | reference: { 36 | paths: string[]; 37 | }; 38 | }>; 39 | } 40 | 41 | async function main(): Promise { 42 | const inputPath = "datasets/kiri-large-100.yaml"; 43 | const outputPath = "datasets/kiri-large-100-fixed.yaml"; 44 | 45 | console.log(`📖 Reading ${inputPath}...`); 46 | const content = readFileSync(inputPath, "utf-8"); 47 | const dataset = parse(content) as Dataset; 48 | 49 | console.log(`📝 Processing ${dataset.queries.length} queries...`); 50 | 51 | // Extract expected from metadata and build expected section 52 | const expected: Array<{ 53 | id: string; 54 | reference: { 55 | paths: string[]; 56 | }; 57 | }> = []; 58 | 59 | const queriesWithoutMetadataExpected = dataset.queries.map((query) => { 60 | const { expected: metadataExpected, ...restMetadata } = query.metadata; 61 | 62 | if (metadataExpected && metadataExpected.length > 0) { 63 | expected.push({ 64 | id: query.id, 65 | reference: { 66 | paths: metadataExpected, 67 | }, 68 | }); 69 | } 70 | 71 | return { 72 | ...query, 73 | metadata: restMetadata, 74 | }; 75 | }); 76 | 77 | // Build new dataset structure 78 | const fixedDataset: Dataset = { 79 | ...dataset, 80 | queries: queriesWithoutMetadataExpected, 81 | expected, 82 | }; 83 | 84 | console.log(`✅ Generated expected section with ${expected.length} entries`); 85 | 86 | // Write output 87 | const output = stringify(fixedDataset, { 88 | indent: 2, 89 | lineWidth: 120, 90 | }); 91 | 92 | writeFileSync(outputPath, output, "utf-8"); 93 | console.log(`💾 Saved to ${outputPath}`); 94 | 95 | // Stats 96 | console.log(`\n📊 Stats:`); 97 | console.log(` Total queries: ${dataset.queries.length}`); 98 | console.log(` With expected: ${expected.length}`); 99 | console.log(` Without expected: ${dataset.queries.length - expected.length}`); 100 | 101 | // Category breakdown 102 | const categoryCount: Record = {}; 103 | dataset.queries.forEach((q) => { 104 | const cat = q.metadata.category; 105 | categoryCount[cat] = (categoryCount[cat] || 0) + 1; 106 | }); 107 | 108 | console.log(`\n📂 Category breakdown:`); 109 | Object.entries(categoryCount) 110 | .sort((a, b) => b[1] - a[1]) 111 | .forEach(([category, count]) => { 112 | console.log(` ${category}: ${count}`); 113 | }); 114 | } 115 | 116 | main().catch((error) => { 117 | console.error("❌ Error:", error); 118 | process.exit(1); 119 | }); 120 | -------------------------------------------------------------------------------- /tests/helpers/migration-setup.ts: -------------------------------------------------------------------------------- 1 | import { 2 | ensureBaseSchema, 3 | ensureDocumentMetadataTables, 4 | ensureRepoMetaColumns, 5 | } from "../../src/indexer/schema.js"; 6 | import { DuckDBClient } from "../../src/shared/duckdb.js"; 7 | 8 | import { createTempDbPath } from "./db-setup.js"; 9 | import { 10 | buildInsertStatement, 11 | createDocumentMetadataRecord, 12 | createFileRecord, 13 | } from "./fixtures.js"; 14 | import { createTempRepo } from "./test-repo.js"; 15 | 16 | /** 17 | * Options for creating migration test scenarios 18 | */ 19 | export interface MigrationScenarioOptions { 20 | /** Whether to include document_metadata tables (simulates migrated state) */ 21 | withMetadata?: boolean; 22 | /** Content for the test repository */ 23 | repoContent?: Record; 24 | } 25 | 26 | /** 27 | * Temporary repository handle 28 | */ 29 | export interface TempRepo { 30 | path: string; 31 | cleanup: () => Promise; 32 | } 33 | 34 | /** 35 | * Temporary database handle 36 | */ 37 | export interface TempDb { 38 | path: string; 39 | cleanup: () => Promise; 40 | } 41 | 42 | /** 43 | * Create a migration test scenario with pre-configured database state 44 | * 45 | * @param options Configuration options for the test scenario 46 | * @returns Object with repo, db, and repoId 47 | */ 48 | export async function createMigrationTestScenario(options: MigrationScenarioOptions = {}): Promise<{ 49 | repo: TempRepo; 50 | db: TempDb; 51 | repoId: number; 52 | }> { 53 | const repoContent = options.repoContent ?? { 54 | "docs/README.md": "---\ntitle: Test\n---\n# Hello\n", 55 | }; 56 | 57 | const repo = await createTempRepo(repoContent); 58 | const db = await createTempDbPath(); 59 | 60 | const dbClient = await DuckDBClient.connect({ databasePath: db.path }); 61 | 62 | try { 63 | // Create base schema (always needed) 64 | await ensureBaseSchema(dbClient); 65 | await ensureRepoMetaColumns(dbClient); 66 | 67 | // Optionally create document_metadata tables (simulates migrated state) 68 | if (options.withMetadata) { 69 | await ensureDocumentMetadataTables(dbClient); 70 | } 71 | 72 | // Insert a file record to simulate existing indexed data 73 | await dbClient.run(`INSERT INTO repo (root) VALUES (?)`, [repo.path]); 74 | const repoResult = await dbClient.all<{ id: number }>(`SELECT id FROM repo WHERE root = ?`, [ 75 | repo.path, 76 | ]); 77 | const repoId = repoResult[0]?.id; 78 | 79 | if (!repoId) { 80 | throw new Error("Failed to create repo record"); 81 | } 82 | 83 | // Use fixture factory to create file record (maintains consistency with schema changes) 84 | const fileRecord = createFileRecord(repoId); 85 | const fileInsert = buildInsertStatement("file", fileRecord); 86 | await dbClient.run(fileInsert.sql, fileInsert.values); 87 | 88 | // If metadata tables exist, populate them to simulate migrated state 89 | if (options.withMetadata) { 90 | const metadataRecord = createDocumentMetadataRecord(repoId); 91 | const metadataInsert = buildInsertStatement("document_metadata", metadataRecord); 92 | await dbClient.run(metadataInsert.sql, metadataInsert.values); 93 | } 94 | 95 | return { repo, db, repoId }; 96 | } finally { 97 | await dbClient.close(); 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/shared/security/config.ts: -------------------------------------------------------------------------------- 1 | import { createHash } from "node:crypto"; 2 | import { mkdirSync, readFileSync, writeFileSync } from "node:fs"; 3 | import { dirname, join, resolve } from "node:path"; 4 | import { fileURLToPath } from "node:url"; 5 | 6 | import { z } from "zod"; 7 | 8 | import { parseSimpleYaml } from "../utils/simpleYaml.js"; 9 | 10 | export interface SecurityConfig { 11 | allowed_paths: string[]; 12 | allow_network_egress: boolean; 13 | allow_subprocess: boolean; 14 | sensitive_tokens: string[]; 15 | } 16 | 17 | export interface SecurityStatus { 18 | config: SecurityConfig; 19 | configPath: string; 20 | lockPath: string; 21 | hash: string; 22 | lockHash: string | null; 23 | matches: boolean; 24 | } 25 | 26 | /** 27 | * セキュリティ設定のスキーマ定義(Zodによる型安全な検証) 28 | */ 29 | const SecurityConfigSchema = z.object({ 30 | allowed_paths: z.array(z.string()).min(1, "At least one allowed path required"), 31 | allow_network_egress: z.boolean(), 32 | allow_subprocess: z.boolean(), 33 | sensitive_tokens: z.array(z.string()), 34 | }); 35 | 36 | export function loadSecurityConfig(configPath?: string): { config: SecurityConfig; hash: string } { 37 | const path = 38 | configPath ?? join(fileURLToPath(import.meta.url), "../../../../config/security.yml"); 39 | const content = readFileSync(path, "utf8"); 40 | const parsed = parseSimpleYaml(content); 41 | 42 | // Zodによるスキーマ検証(手動アサーションを置き換え) 43 | const result = SecurityConfigSchema.safeParse(parsed); 44 | if (!result.success) { 45 | const errors = result.error.issues.map((i) => i.message).join(", "); 46 | throw new Error(`Security configuration is invalid. Fix the following errors: ${errors}`); 47 | } 48 | 49 | const hash = createHash("sha256").update(content).digest("hex"); 50 | return { config: result.data, hash }; 51 | } 52 | 53 | export function readSecurityLock(lockPath?: string): string | null { 54 | try { 55 | return readFileSync(resolve(lockPath ?? "var/security.lock"), "utf8").trim(); 56 | } catch { 57 | return null; 58 | } 59 | } 60 | 61 | export function evaluateSecurityStatus(configPath?: string, lockPath?: string): SecurityStatus { 62 | const { config, hash } = loadSecurityConfig(configPath); 63 | const stored = readSecurityLock(lockPath); 64 | const defaultConfigPath = join(fileURLToPath(import.meta.url), "../../../../config/security.yml"); 65 | return { 66 | config, 67 | configPath: configPath ?? defaultConfigPath, 68 | lockPath: resolve(lockPath ?? "var/security.lock"), 69 | hash, 70 | lockHash: stored, 71 | matches: stored === null ? false : stored === hash, 72 | }; 73 | } 74 | 75 | export function assertSecurityBaseline(configPath?: string, lockPath?: string): SecurityStatus { 76 | const status = evaluateSecurityStatus(configPath, lockPath); 77 | if (!status.lockHash) { 78 | throw new Error( 79 | `Security lock is missing at ${status.lockPath}. Establish baseline by running 'pnpm exec tsx src/client/cli.ts security verify --write-lock'.` 80 | ); 81 | } 82 | if (!status.matches) { 83 | throw new Error( 84 | `Security configuration at ${status.configPath} does not match lock hash. Review configuration changes before proceeding.` 85 | ); 86 | } 87 | return status; 88 | } 89 | 90 | export function updateSecurityLock(hash: string, lockPath?: string): string { 91 | const path = resolve(lockPath ?? "var/security.lock"); 92 | mkdirSync(dirname(path), { recursive: true }); 93 | writeFileSync(path, `${hash}\n`, "utf8"); 94 | return path; 95 | } 96 | -------------------------------------------------------------------------------- /src/indexer/git.ts: -------------------------------------------------------------------------------- 1 | import { execFile } from "node:child_process"; 2 | import { promisify } from "node:util"; 3 | 4 | const execFileAsync = promisify(execFile); 5 | 6 | const GIT_LS_ARGS = ["ls-files", "-z"] as const; 7 | const GIT_LS_ARGS_WITH_SUBMODULES = ["ls-files", "--recurse-submodules", "-z"] as const; 8 | 9 | let warnedAboutRecurseFallback = false; 10 | 11 | function parseGitPaths(output: string): string[] { 12 | return output 13 | .split("\0") 14 | .map((item) => item.trim()) 15 | .filter((item) => item.length > 0); 16 | } 17 | 18 | function shouldFallbackWithoutRecurse(error: unknown): boolean { 19 | if (!error || typeof error !== "object") { 20 | return false; 21 | } 22 | const err = error as NodeJS.ErrnoException & { stderr?: string }; 23 | if (typeof err.code === "number" && err.code === 129) { 24 | // git returns exit code 129 for unknown options 25 | return true; 26 | } 27 | if (typeof err.code === "string" && Number.parseInt(err.code, 10) === 129) { 28 | // git returns exit code 129 for unknown options 29 | return true; 30 | } 31 | const stderr = err.stderr ?? ""; 32 | return stderr.includes("unknown option") || stderr.includes("does not support"); 33 | } 34 | 35 | export async function gitLsFiles(repoRoot: string): Promise { 36 | try { 37 | const { stdout } = await execFileAsync("git", [...GIT_LS_ARGS_WITH_SUBMODULES], { 38 | cwd: repoRoot, 39 | }); 40 | return parseGitPaths(stdout); 41 | } catch (error) { 42 | if (shouldFallbackWithoutRecurse(error)) { 43 | if (!warnedAboutRecurseFallback) { 44 | console.warn( 45 | "git ls-files does not support --recurse-submodules on this system. " + 46 | "Falling back to superproject-only scan; submodule files will be skipped." 47 | ); 48 | warnedAboutRecurseFallback = true; 49 | } 50 | const { stdout } = await execFileAsync("git", [...GIT_LS_ARGS], { cwd: repoRoot }); 51 | return parseGitPaths(stdout); 52 | } 53 | throw error; 54 | } 55 | } 56 | 57 | /** 58 | * untracked files(.gitignoreで除外されないもの)を取得 59 | * watchモードで追加された新規ファイルがreconcileDeletedFilesで 60 | * 誤って削除されないようにするために使用 61 | */ 62 | export async function gitLsFilesUntracked(repoRoot: string): Promise { 63 | const { stdout } = await execFileAsync( 64 | "git", 65 | ["ls-files", "--others", "--exclude-standard", "-z"], 66 | { cwd: repoRoot } 67 | ); 68 | return parseGitPaths(stdout); 69 | } 70 | 71 | export async function getHeadCommit(repoRoot: string): Promise { 72 | const { stdout } = await execFileAsync("git", ["rev-parse", "HEAD"], { cwd: repoRoot }); 73 | return stdout.trim(); 74 | } 75 | 76 | export async function getDefaultBranch(repoRoot: string): Promise { 77 | try { 78 | const { stdout } = await execFileAsync("git", ["rev-parse", "--abbrev-ref", "HEAD"], { 79 | cwd: repoRoot, 80 | }); 81 | const branch = stdout.trim(); 82 | if (branch === "HEAD" || branch.length === 0) { 83 | return null; 84 | } 85 | return branch; 86 | } catch { 87 | return null; 88 | } 89 | } 90 | 91 | export async function gitDiffNameOnly(repoRoot: string, sinceRef: string): Promise { 92 | const args = ["diff", "--name-only", "-z", "--diff-filter=ACDMRTUXB", sinceRef, "HEAD"]; 93 | const { stdout } = await execFileAsync("git", args, { cwd: repoRoot }); 94 | return stdout 95 | .split("\0") 96 | .map((item) => item.trim()) 97 | .filter((item) => item.length > 0); 98 | } 99 | -------------------------------------------------------------------------------- /docs/formal/language-support/PlanB_HierarchicalBackend.als: -------------------------------------------------------------------------------- 1 | -- Plan B: Hierarchical Backend + Pool Integration 2 | -- Structural model with resource pooling for LSP clients 3 | 4 | module PlanB_HierarchicalBackend 5 | 6 | -- Core signatures 7 | abstract sig Language {} 8 | one sig TypeScript, Swift, PHP, Java, Dart, Rust extends Language {} 9 | 10 | abstract sig BackendType {} 11 | one sig TreeSitter, CompilerAPI, LSP extends BackendType {} 12 | 13 | abstract sig PoolState {} 14 | one sig Available, InUse, Initializing, Failed extends PoolState {} 15 | 16 | sig Client { 17 | state: one PoolState, 18 | language: one Language 19 | } 20 | 21 | sig Pool { 22 | clients: set Client, 23 | maxSize: one Int, 24 | language: one Language 25 | } 26 | 27 | sig Backend { 28 | backendType: one BackendType, 29 | pool: lone Pool 30 | } 31 | 32 | sig Analyzer { 33 | language: one Language, 34 | backend: one Backend 35 | } 36 | 37 | sig File { 38 | lang: one Language, 39 | lockedBy: lone Client 40 | } 41 | 42 | -- Facts 43 | 44 | -- Pool size constraint 45 | fact PoolSizePositive { 46 | all p: Pool | p.maxSize > 0 and p.maxSize <= 5 47 | } 48 | 49 | -- Clients in pool match pool's language 50 | fact PoolClientLanguage { 51 | all p: Pool, c: p.clients | c.language = p.language 52 | } 53 | 54 | -- Pool size limit 55 | fact PoolSizeLimit { 56 | all p: Pool | #p.clients <= p.maxSize 57 | } 58 | 59 | -- LSP backends must have a pool 60 | fact LSPHasPool { 61 | all b: Backend | b.backendType = LSP implies some b.pool 62 | } 63 | 64 | -- Non-LSP backends don't have pools 65 | fact NonLSPNoPool { 66 | all b: Backend | b.backendType != LSP implies no b.pool 67 | } 68 | 69 | -- A file can only be locked by an InUse client 70 | fact FileLockRequiresInUse { 71 | all f: File | some f.lockedBy implies f.lockedBy.state = InUse 72 | } 73 | 74 | -- Mutual exclusion: a file locked by at most one client 75 | fact FileLockMutualExclusion { 76 | all f: File | lone f.lockedBy 77 | } 78 | 79 | -- Client language must match file language for locking 80 | fact LockLanguageMatch { 81 | all f: File, c: Client | f.lockedBy = c implies c.language = f.lang 82 | } 83 | 84 | -- Every client belongs to exactly one pool 85 | fact ClientBelongsToPool { 86 | all c: Client | one p: Pool | c in p.clients 87 | } 88 | 89 | -- Assertions 90 | 91 | -- Pool never exceeds max size 92 | assert PoolNeverOverflows { 93 | all p: Pool | #p.clients <= p.maxSize 94 | } 95 | 96 | -- Available clients don't hold locks 97 | assert AvailableClientsNoLocks { 98 | all c: Client | c.state = Available implies (all f: File | f.lockedBy != c) 99 | } 100 | 101 | -- File lock consistency 102 | assert FileLockConsistent { 103 | all f: File | some f.lockedBy implies 104 | (f.lockedBy.state = InUse and f.lockedBy.language = f.lang) 105 | } 106 | 107 | -- Concurrent processing: different language files can be processed concurrently 108 | assert ConcurrentProcessing { 109 | all disj f1, f2: File, disj c1, c2: Client | 110 | (f1.lockedBy = c1 and f2.lockedBy = c2 and f1.lang != f2.lang) implies c1 != c2 111 | } 112 | 113 | -- Run commands 114 | run showPooledSystem { 115 | some p: Pool | #p.clients >= 2 116 | some f: File | some f.lockedBy 117 | } for 6 118 | 119 | run showMultiplePools { 120 | #Pool > 1 121 | all p: Pool | #p.clients > 0 122 | } for 8 123 | 124 | check PoolNeverOverflows for 10 125 | check AvailableClientsNoLocks for 10 126 | check FileLockConsistent for 10 127 | check ConcurrentProcessing for 8 128 | -------------------------------------------------------------------------------- /src/server/abbreviations.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Abbreviation expansion for path matching (ADR 003) 3 | * 4 | * Bridges the vocabulary gap between natural language queries and technical path names. 5 | * Example: "database" → ["database", "db", "data"] 6 | * 7 | * LDE principles: 8 | * - Pure functions (no side effects) 9 | * - Readonly types (immutability) 10 | * - Property-based testing 11 | */ 12 | 13 | /** 14 | * Abbreviation mapping (LDE: readonly, immutable) 15 | */ 16 | export interface AbbreviationMap { 17 | readonly canonical: string; // Standard full form (e.g., "database") 18 | readonly variants: readonly string[]; // Common abbreviations (e.g., ["db", "data"]) 19 | } 20 | 21 | /** 22 | * Default abbreviation dictionary 23 | * 24 | * Maintenance: Add new entries as needed, alphabetically sorted by canonical 25 | * Conflicts: Use most common meaning (e.g., "db" → "database" not "debug") 26 | */ 27 | export const DEFAULT_ABBREVIATIONS: readonly AbbreviationMap[] = [ 28 | { canonical: "administrator", variants: ["admin"] }, 29 | { canonical: "application", variants: ["app"] }, 30 | { canonical: "authentication", variants: ["auth"] }, 31 | { canonical: "authorization", variants: ["authz"] }, 32 | { canonical: "configuration", variants: ["config", "cfg", "conf"] }, 33 | { canonical: "controller", variants: ["ctrl"] }, 34 | { canonical: "database", variants: ["db", "data"] }, 35 | { canonical: "development", variants: ["dev"] }, 36 | { canonical: "directory", variants: ["dir"] }, 37 | { canonical: "document", variants: ["doc", "docs"] }, 38 | { canonical: "error", variants: ["err", "errors"] }, 39 | { canonical: "implementation", variants: ["impl"] }, 40 | { canonical: "manager", variants: ["mgr"] }, 41 | { canonical: "production", variants: ["prod"] }, 42 | { canonical: "repository", variants: ["repo"] }, 43 | { canonical: "service", variants: ["svc", "srv"] }, 44 | { canonical: "source", variants: ["src"] }, 45 | { canonical: "specification", variants: ["spec", "specs"] }, 46 | { canonical: "temporary", variants: ["tmp", "temp"] }, 47 | { canonical: "utilities", variants: ["util", "utils"] }, 48 | ]; 49 | 50 | /** 51 | * Expand term with common abbreviations (LDE: pure function) 52 | * 53 | * Matches term (case-insensitive) against abbreviation dictionary and returns 54 | * all related forms (canonical + variants). 55 | * 56 | * @param term - Query term to expand (e.g., "db", "database", "config") 57 | * @param abbreviations - Abbreviation dictionary (default: DEFAULT_ABBREVIATIONS) 58 | * @returns Readonly array of expanded terms including canonical and all variants 59 | * 60 | * @example 61 | * expandAbbreviations("db") → ["database", "db", "data"] 62 | * expandAbbreviations("config") → ["configuration", "config", "cfg", "conf"] 63 | * expandAbbreviations("unknown") → ["unknown"] 64 | * 65 | * LDE properties: 66 | * - Pure: Same input always produces same output 67 | * - No side effects: Doesn't modify input or global state 68 | * - Immutable: Returns readonly array 69 | */ 70 | export function expandAbbreviations( 71 | term: string, 72 | abbreviations: readonly AbbreviationMap[] = DEFAULT_ABBREVIATIONS 73 | ): readonly string[] { 74 | const normalized = term.toLowerCase().trim(); 75 | 76 | // Find matching abbreviation map (check both canonical and variants) 77 | const map = abbreviations.find( 78 | (m) => m.canonical === normalized || m.variants.includes(normalized) 79 | ); 80 | 81 | if (map) { 82 | // Return all forms: canonical + variants 83 | return [map.canonical, ...map.variants]; 84 | } 85 | 86 | // No match: return original term 87 | return [term]; 88 | } 89 | -------------------------------------------------------------------------------- /src/shared/utils/simpleYaml.ts: -------------------------------------------------------------------------------- 1 | export interface SimpleYamlObject { 2 | [key: string]: SimpleYamlValue; 3 | } 4 | 5 | export type SimpleYamlValue = 6 | | string 7 | | number 8 | | boolean 9 | | null 10 | | SimpleYamlValue[] 11 | | SimpleYamlObject; 12 | 13 | function parseScalar(value: string): SimpleYamlValue { 14 | let trimmed = value.trim(); 15 | const commentIndex = trimmed.indexOf(" #"); 16 | if (commentIndex >= 0) { 17 | trimmed = trimmed.slice(0, commentIndex).trim(); 18 | } 19 | if (trimmed === "true") return true; 20 | if (trimmed === "false") return false; 21 | if (trimmed === "null") return null; 22 | if (/^-?\d+(\.\d+)?$/.test(trimmed)) { 23 | return Number(trimmed); 24 | } 25 | if (trimmed.startsWith('"') && trimmed.endsWith('"')) { 26 | return trimmed.slice(1, -1); 27 | } 28 | if (trimmed.startsWith("'") && trimmed.endsWith("'")) { 29 | return trimmed.slice(1, -1); 30 | } 31 | return trimmed; 32 | } 33 | 34 | interface StackEntry { 35 | indent: number; 36 | value: SimpleYamlValue; 37 | } 38 | 39 | function ensureObject(value: SimpleYamlValue): Record { 40 | if (!value || typeof value !== "object" || Array.isArray(value)) { 41 | throw new Error("Expected mapping"); 42 | } 43 | return value as Record; 44 | } 45 | 46 | export function parseSimpleYaml(content: string): Record { 47 | const root: Record = {}; 48 | const lines = content.split(/\r?\n/); 49 | const stack: StackEntry[] = [{ indent: -1, value: root }]; 50 | 51 | for (let index = 0; index < lines.length; index++) { 52 | const rawLine = lines[index]; 53 | if (!rawLine || /^\s*$/.test(rawLine) || /^\s*#/.test(rawLine)) { 54 | continue; 55 | } 56 | const indentMatch = rawLine.match(/^\s*/); 57 | const indent = indentMatch?.[0]?.length ?? 0; 58 | const line = rawLine.trim(); 59 | 60 | while (stack.length > 0) { 61 | const last = stack[stack.length - 1]; 62 | if (!last || indent <= last.indent) { 63 | stack.pop(); 64 | } else { 65 | break; 66 | } 67 | } 68 | const parent = stack[stack.length - 1]; 69 | if (!parent) { 70 | throw new Error("Invalid YAML structure: no parent context"); 71 | } 72 | const container = parent.value; 73 | 74 | if (line.startsWith("- ")) { 75 | if (!Array.isArray(container)) { 76 | throw new Error("List item without array context"); 77 | } 78 | (container as SimpleYamlValue[]).push(parseScalar(line.slice(2))); 79 | continue; 80 | } 81 | 82 | const separatorIndex = line.indexOf(":"); 83 | if (separatorIndex === -1) { 84 | throw new Error(`Invalid YAML line: ${line}`); 85 | } 86 | const key = line.slice(0, separatorIndex).trim(); 87 | const remainder = line.slice(separatorIndex + 1); 88 | const target = ensureObject(container); 89 | if (remainder.trim().length === 0) { 90 | const nextLine = lines[index + 1]; 91 | const isList = nextLine ? nextLine.trim().startsWith("- ") : false; 92 | if (isList) { 93 | const arr: SimpleYamlValue[] = []; 94 | target[key] = arr; 95 | stack.push({ indent, value: arr }); 96 | } else { 97 | const obj: Record = {}; 98 | target[key] = obj; 99 | stack.push({ indent, value: obj }); 100 | } 101 | } else { 102 | target[key] = parseScalar(remainder); 103 | } 104 | } 105 | 106 | return root; 107 | } 108 | -------------------------------------------------------------------------------- /scripts/diag/query-terms.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env tsx 2 | import { readFile } from "node:fs/promises"; 3 | import process from "node:process"; 4 | 5 | import yaml from "yaml"; 6 | 7 | import { tokenizeText } from "../../src/shared/tokenizer.js"; 8 | 9 | interface CliOptions { 10 | datasetPath: string; 11 | repoFilter?: string; 12 | idFilter?: string; 13 | } 14 | 15 | interface GoldenQuery { 16 | id: string; 17 | query?: string; 18 | goal?: string; 19 | repo?: string; 20 | expected?: { 21 | paths?: string[]; 22 | }; 23 | hints?: string[]; 24 | } 25 | 26 | interface GoldenDataset { 27 | queries: GoldenQuery[]; 28 | } 29 | 30 | function parseArgs(): CliOptions { 31 | const options: CliOptions = { 32 | datasetPath: "tests/eval/goldens/queries.yaml", 33 | }; 34 | for (let i = 2; i < process.argv.length; i += 1) { 35 | const arg = process.argv[i]; 36 | if (arg === "--dataset") { 37 | options.datasetPath = process.argv[i + 1] ?? options.datasetPath; 38 | i += 1; 39 | } else if (arg === "--repo") { 40 | const value = process.argv[i + 1]; 41 | if (value !== undefined) options.repoFilter = value; 42 | i += 1; 43 | } else if (arg === "--id") { 44 | const value = process.argv[i + 1]; 45 | if (value !== undefined) options.idFilter = value; 46 | i += 1; 47 | } 48 | } 49 | return options; 50 | } 51 | 52 | function unique(values: string[]): string[] { 53 | return Array.from(new Set(values)); 54 | } 55 | 56 | function extractPathSegments(paths: string[] | undefined): string[] { 57 | if (!paths || paths.length === 0) { 58 | return []; 59 | } 60 | const segments: string[] = []; 61 | for (const path of paths) { 62 | const parts = path 63 | .split(/[/.]/) 64 | .map((part) => part.trim()) 65 | .filter((part) => part.length >= 3); 66 | segments.push(...parts); 67 | } 68 | return unique(segments); 69 | } 70 | 71 | function logQueryInfo(query: GoldenQuery): void { 72 | const text = query.query ?? query.goal ?? ""; 73 | const keywords = unique(tokenizeText(text)); 74 | const pathSegments = extractPathSegments(query.expected?.paths); 75 | const hints = unique(query.hints ?? []); 76 | 77 | console.log(`\n=== ${query.id} (repo: ${query.repo ?? "default"}) ===`); 78 | console.log(`Query Text: ${text}`); 79 | console.log(`Keywords (${keywords.length}): ${keywords.join(", ") || "(none)"}`); 80 | console.log(`Path segments (${pathSegments.length}): ${pathSegments.join(", ") || "(none)"}`); 81 | console.log(`Hints (${hints.length}): ${hints.join(", ") || "(none)"}`); 82 | } 83 | 84 | async function main(): Promise { 85 | const options = parseArgs(); 86 | const raw = await readFile(options.datasetPath, "utf8"); 87 | const dataset = yaml.parse(raw) as GoldenDataset; 88 | if (!dataset?.queries) { 89 | throw new Error(`Dataset at ${options.datasetPath} has no queries`); 90 | } 91 | 92 | const filtered = dataset.queries.filter((query) => { 93 | if (options.idFilter && query.id !== options.idFilter) { 94 | return false; 95 | } 96 | if (options.repoFilter && query.repo !== options.repoFilter) { 97 | return false; 98 | } 99 | return true; 100 | }); 101 | 102 | if (filtered.length === 0) { 103 | console.warn("No queries matched the provided filters."); 104 | return; 105 | } 106 | 107 | for (const query of filtered) { 108 | logQueryInfo(query); 109 | } 110 | } 111 | 112 | main().catch((error) => { 113 | console.error(error instanceof Error ? error.message : String(error)); 114 | process.exit(1); 115 | }); 116 | -------------------------------------------------------------------------------- /docs/formal/adaptive-k.md: -------------------------------------------------------------------------------- 1 | # Adaptive K (Issue 78) 形式仕様と検証 2 | 3 | ## 1. 形式化候補一覧 4 | 5 | - 適応Kマッピング関数 `getAdaptiveK` / High / TLA+ / カテゴリ→K決定の安全性と境界値保証。 6 | - 未知カテゴリフォールバック / Medium / TLA+ / デフォルト挙動を明示し漏れを防ぐ。 7 | - K値の外部上書き(環境変数・設定)/ Low / Alloy(未着手)/ 設定組合せで既定セットを壊すリスク検知。 8 | 9 | ## 2. 選定対象と不変条件 10 | 11 | - 対象: 適応Kマッピング + 未知カテゴリフォールバック + 許容集合/範囲チェック 12 | - InvAllowedSet: k ∈ ALLOWED_SET 13 | - InvRange: K_MIN ≤ k ≤ K_MAX 14 | - InvBugfixPrecision: bugfix → K_BUGFIX 15 | - InvIntegrationPrecision: integration → K_INTEGRATION 16 | - InvTestfailRecall: testfail → K_TESTFAIL 17 | - InvPerformanceRecall: performance → K_PERFORMANCE 18 | - InvGenericBalance: その他 → K_DEFAULT(未知カテゴリ含む) 19 | 20 | ## 3. 形式仕様(TLA+) 21 | 22 | - 本体: `docs/formal/AdaptiveK.tla` 23 | - CFG: 本番 `docs/formal/AdaptiveK-prod.cfg`(既定セット {5,10,20})、実験例 `docs/formal/AdaptiveK-exp.cfg`(既定セット {5,10,15,20} かつデフォルト15)。 24 | - 抜粋: 25 | 26 | ```tla 27 | CONSTANTS CATEGORIES, ALLOWED_SET, K_MIN, K_MAX, 28 | K_BUGFIX, K_INTEGRATION, K_TESTFAIL, K_PERFORMANCE, K_DEFAULT 29 | AdaptiveK(cat) == 30 | IF cat = "bugfix" THEN K_BUGFIX 31 | ELSE IF cat = "integration" THEN K_INTEGRATION 32 | ELSE IF cat = "testfail" THEN K_TESTFAIL 33 | ELSE IF cat = "performance" THEN K_PERFORMANCE 34 | ELSE K_DEFAULT 35 | ``` 36 | 37 | - 不変条件: `InvAllowedSet`, `InvRange`, 各カテゴリのK固定、汎用フォールバックを `Spec == Init /\ [][Next]_<>` 上で証明。 38 | 39 | ### 設定・フラグ(実装方針メモ) 40 | 41 | - フラグ: `KIRI_ADAPTIVE_K_ENABLED`(env)、falseで常に `kWhenDisabled` を返す。 42 | - 許容集合: `KIRI_ADAPTIVE_K_ALLOWED_SET`(カンマ区切り)。 43 | - 範囲: `KIRI_ADAPTIVE_K_MIN` / `KIRI_ADAPTIVE_K_MAX`。 44 | - カテゴリ別K: `KIRI_ADAPTIVE_K_BUGFIX` / `KIRI_ADAPTIVE_K_INTEGRATION` / `KIRI_ADAPTIVE_K_TESTFAIL` / `KIRI_ADAPTIVE_K_PERFORMANCE`。 45 | - デフォルト: `KIRI_ADAPTIVE_K_DEFAULT`、フラグOFF固定値 `KIRI_ADAPTIVE_K_DISABLED_VALUE`。 46 | 47 | ## 4. 検証結果 48 | 49 | - 本番プロファイル: `java -cp /tmp/tla2tools.jar tlc2.TLC -deadlock -config docs/formal/AdaptiveK-prod.cfg docs/formal/AdaptiveK.tla` 50 | - 42 states / 6 distinct / 深さ1、デッドロックなし、全不変成立。 51 | - 実験プロファイル: `java -cp /tmp/tla2tools.jar tlc2.TLC -deadlock -config docs/formal/AdaptiveK-exp.cfg docs/formal/AdaptiveK.tla` 52 | - 42 states / 6 distinct / 深さ1、デッドロックなし、全不変成立。 53 | - CI 推奨: `pnpm run check:adaptive-k`(設定読み込み+バリデーションを実行)。 54 | 55 | ## 5. 仕様 → 実装対応表 56 | 57 | | 仕様要素 | 実装箇所(想定) | 説明 | 58 | | --------------------------------- | ----------------------------------------- | ------------------------------------------------- | 59 | | `AdaptiveK(cat)` | `src/shared/adaptive-k.ts:getAdaptiveK` | カテゴリ→Kの決定ロジック。 | 60 | | `CATEGORIES` | `src/types/query.ts` のカテゴリ型ユニオン | クエリメタデータ定義と同期必須。 | 61 | | `ALLOWED_SET` / `K_MIN` / `K_MAX` | 設定読み込み+バリデーション層 | 許容K集合と範囲を起動時チェック。 | 62 | | `K_*` 定数 | 設定または定数マップ | 各カテゴリの既定K値。未知カテゴリは `K_DEFAULT`。 | 63 | | `kWhenDisabled` (実装) | 設定読み込み層 | フラグOFF時に返す固定K。 | 64 | | `InvAllowedSet` / `InvRange` | バリデーション+テレメトリ | 実行時に集合・範囲を外れたら即検知。 | 65 | | `Init/Next` | リクエストごとに K を再計算する検索前処理 | 毎回最新カテゴリで決定。 | 66 | 67 | ## 6. 残課題・リスク 68 | 69 | - カテゴリ分類器の誤分類は未形式化。誤分類時の影響を評価・監視する必要あり。 70 | - 設定や環境変数で ALLOWED*SET / K*\* を変える場合の整合性(設定優先度・衝突)は未検証。Alloy/TLA+で別途モデル化予定。 71 | - カテゴリ追加時に `CATEGORIES` と型ユニオン・設定がズレるリスク。単一ソース生成かCIチェックを推奨。 72 | 73 | ## 7. 仮定と不明点 74 | 75 | - 仮定: 未知カテゴリは常に `K_DEFAULT`(本番デフォルト10)へフォールバックする。 76 | - 仮定: ALLOWED_SET は起動時に設定で確定し、実行時に動的変更しない。 77 | - 不明点: ALLOWED_SET の上限・下限を決める運用上の根拠(レイテンシ・UI枠)が未記載。決定後に K_MIN/K_MAX を更新し再検証が必要。 78 | -------------------------------------------------------------------------------- /scripts/add-schema-hints.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Add hint_dictionary entries for schema-related terms 3 | * Maps "schema" keyword to schema file paths for better search relevance 4 | */ 5 | import { DuckDBClient } from "../src/shared/duckdb.js"; 6 | 7 | async function main() { 8 | const dbPath = process.argv[2] || "external/assay-kit/.kiri/index.duckdb"; 9 | 10 | const db = await DuckDBClient.connect({ 11 | databasePath: dbPath, 12 | ensureDirectory: false, 13 | }); 14 | 15 | try { 16 | // Get repo_id for assay-kit 17 | const repoRows = await db.all<{ id: number }>(`SELECT id FROM repo LIMIT 1`); 18 | if (repoRows.length === 0) { 19 | throw new Error("No repository found in database"); 20 | } 21 | const repoId = repoRows[0]!.id; 22 | console.log(`Using repo_id=${repoId}`); 23 | 24 | // Schema file paths to add as hint targets 25 | const schemaHints = [ 26 | { 27 | hint: "schema", 28 | path: "packages/assay-kit/src/dataset/schemas/dataset.schema.ts", 29 | freq: 10, 30 | }, 31 | { hint: "schema", path: "packages/assay-kit/src/dataset/schemas/query.schema.ts", freq: 8 }, 32 | { hint: "schema", path: "packages/assay-kit/src/dataset/schemas/metrics.schema.ts", freq: 6 }, 33 | { 34 | hint: "validation", 35 | path: "packages/assay-kit/src/dataset/schemas/dataset.schema.ts", 36 | freq: 8, 37 | }, 38 | { hint: "validation", path: "packages/assay-kit/src/cli/commands/validate.ts", freq: 6 }, 39 | { hint: "dataset", path: "packages/assay-kit/src/dataset/loader.ts", freq: 10 }, 40 | { 41 | hint: "dataset", 42 | path: "packages/assay-kit/src/dataset/schemas/dataset.schema.ts", 43 | freq: 8, 44 | }, 45 | { hint: "loader", path: "packages/assay-kit/src/dataset/loader.ts", freq: 10 }, 46 | ]; 47 | 48 | // Insert hint_dictionary entries 49 | for (const entry of schemaHints) { 50 | // Check if entry exists 51 | const existing = await db.all<{ cnt: number }>( 52 | `SELECT COUNT(*) as cnt FROM hint_dictionary 53 | WHERE repo_id = ? AND hint_value = ? AND target_path = ?`, 54 | [repoId, entry.hint, entry.path] 55 | ); 56 | 57 | if (existing[0]?.cnt === 0) { 58 | await db.run( 59 | `INSERT INTO hint_dictionary (repo_id, hint_value, target_path, freq) 60 | VALUES (?, ?, ?, ?)`, 61 | [repoId, entry.hint, entry.path, entry.freq] 62 | ); 63 | console.log(`Added: "${entry.hint}" → ${entry.path} (freq=${entry.freq})`); 64 | } else { 65 | await db.run( 66 | `UPDATE hint_dictionary SET freq = ? 67 | WHERE repo_id = ? AND hint_value = ? AND target_path = ?`, 68 | [entry.freq, repoId, entry.hint, entry.path] 69 | ); 70 | console.log(`Updated: "${entry.hint}" → ${entry.path} (freq=${entry.freq})`); 71 | } 72 | } 73 | 74 | // Verify insertion 75 | const count = await db.all<{ cnt: number }>( 76 | `SELECT COUNT(*) as cnt FROM hint_dictionary WHERE repo_id = ?`, 77 | [repoId] 78 | ); 79 | console.log(`\nTotal hint_dictionary entries for repo_id=${repoId}: ${count[0]?.cnt}`); 80 | 81 | // Show all entries 82 | const entries = await db.all<{ hint_value: string; target_path: string; freq: number }>( 83 | `SELECT hint_value, target_path, freq FROM hint_dictionary WHERE repo_id = ? ORDER BY hint_value, freq DESC`, 84 | [repoId] 85 | ); 86 | console.log("\nAll hint_dictionary entries:"); 87 | for (const e of entries) { 88 | console.log(` "${e.hint_value}" → ${e.target_path} (freq=${e.freq})`); 89 | } 90 | } finally { 91 | await db.close(); 92 | } 93 | } 94 | 95 | main().catch(console.error); 96 | -------------------------------------------------------------------------------- /scripts/assay/kiri-variants.ts: -------------------------------------------------------------------------------- 1 | import { KiriSearchAdapter, type KiriAdapterConfig } from "./kiri-adapter.js"; 2 | 3 | export interface KiriVariantConfig extends KiriAdapterConfig { 4 | name: string; 5 | description: string; 6 | port?: number; 7 | } 8 | 9 | export const KIRI_VARIANTS: Record = { 10 | default: { 11 | name: "default", 12 | description: "Default KIRI configuration (Phase 1 compatible)", 13 | limit: 5, 14 | compact: true, 15 | }, 16 | balanced: { 17 | name: "balanced", 18 | description: "Balanced boost with higher limit", 19 | limit: 15, 20 | compact: false, 21 | boostProfile: "balanced", 22 | port: 20099, 23 | }, 24 | docs: { 25 | name: "docs", 26 | description: "Documentation-focused profile", 27 | limit: 10, 28 | compact: true, 29 | boostProfile: "docs", 30 | port: 20199, 31 | }, 32 | noBoost: { 33 | name: "noBoost", 34 | description: "Boost profile disabled (baseline)", 35 | limit: 10, 36 | compact: true, 37 | boostProfile: "none", 38 | port: 20299, 39 | }, 40 | feature: { 41 | name: "feature", 42 | description: "Feature development profile", 43 | limit: 6, 44 | compact: true, 45 | boostProfile: "feature", 46 | port: 20399, 47 | }, 48 | bugfix: { 49 | name: "bugfix", 50 | description: "Bug fix profile", 51 | limit: 6, 52 | compact: true, 53 | boostProfile: "bugfix", 54 | port: 20499, 55 | }, 56 | debug: { 57 | name: "debug", 58 | description: "Debug profile", 59 | limit: 6, 60 | compact: true, 61 | boostProfile: "debug", 62 | port: 20599, 63 | }, 64 | api: { 65 | name: "api", 66 | description: "API development profile", 67 | limit: 6, 68 | compact: true, 69 | boostProfile: "api", 70 | port: 20699, 71 | }, 72 | editor: { 73 | name: "editor", 74 | description: "Editor integration profile", 75 | limit: 6, 76 | compact: true, 77 | boostProfile: "editor", 78 | port: 20799, 79 | }, 80 | testfail: { 81 | name: "testfail", 82 | description: "Test failure diagnosis profile", 83 | limit: 6, 84 | compact: true, 85 | boostProfile: "testfail", 86 | port: 20899, 87 | }, 88 | typeerror: { 89 | name: "typeerror", 90 | description: "Type error debugging profile", 91 | limit: 6, 92 | compact: true, 93 | boostProfile: "typeerror", 94 | port: 20999, 95 | }, 96 | }; 97 | 98 | export function getVariantConfig(name: string): KiriVariantConfig { 99 | const config = KIRI_VARIANTS[name]; 100 | if (!config) { 101 | throw new Error( 102 | `Unknown variant: ${name}. Available: ${Object.keys(KIRI_VARIANTS).join(", ")}` 103 | ); 104 | } 105 | return config; 106 | } 107 | 108 | export function getAvailableVariants(): string[] { 109 | return Object.keys(KIRI_VARIANTS); 110 | } 111 | 112 | export function createKiriAdapter( 113 | variantName: string, 114 | databasePath: string, 115 | repoRoot: string, 116 | kiriServerPath?: string 117 | ): KiriSearchAdapter { 118 | const config = getVariantConfig(variantName); 119 | const adapterConfig: KiriAdapterConfig = {}; 120 | if (config.limit !== undefined) { 121 | adapterConfig.limit = config.limit; 122 | } 123 | if (config.compact !== undefined) { 124 | adapterConfig.compact = config.compact; 125 | } 126 | if (config.boostProfile !== undefined) { 127 | adapterConfig.boostProfile = config.boostProfile; 128 | } 129 | 130 | return new KiriSearchAdapter( 131 | databasePath, 132 | repoRoot, 133 | kiriServerPath, 134 | config.port ?? 19999, 135 | adapterConfig 136 | ); 137 | } 138 | -------------------------------------------------------------------------------- /scripts/diag/dump-hints.ts: -------------------------------------------------------------------------------- 1 | import { writeFile } from "node:fs/promises"; 2 | import { resolve } from "node:path"; 3 | import process from "node:process"; 4 | 5 | import { DuckDBClient } from "../../src/shared/duckdb.js"; 6 | import { normalizeRepoPath } from "../../src/shared/utils/path.js"; 7 | 8 | interface DumpArgs { 9 | databasePath: string; 10 | outPath?: string; 11 | repoRoot?: string; 12 | limit: number; 13 | } 14 | 15 | function parseArgs(argv: string[]): DumpArgs { 16 | let databasePath: string | undefined; 17 | let outPath: string | undefined; 18 | let repoRoot: string | undefined; 19 | let limit = 5000; 20 | for (let i = 0; i < argv.length; i += 1) { 21 | const arg = argv[i]; 22 | if (arg === "--db") { 23 | databasePath = argv[++i]; 24 | } else if (arg === "--out") { 25 | outPath = argv[++i]; 26 | } else if (arg === "--repo") { 27 | repoRoot = argv[++i]; 28 | } else if (arg === "--limit") { 29 | const value = Number.parseInt(argv[++i] ?? "", 10); 30 | if (Number.isFinite(value) && value > 0) { 31 | limit = value; 32 | } 33 | } 34 | } 35 | if (!databasePath) { 36 | throw new Error("dump-hints requires --db argument"); 37 | } 38 | const result: DumpArgs = { databasePath, limit }; 39 | if (outPath !== undefined) { 40 | result.outPath = outPath; 41 | } 42 | if (repoRoot !== undefined) { 43 | result.repoRoot = repoRoot; 44 | } 45 | return result; 46 | } 47 | 48 | export async function main(argv = process.argv.slice(2)): Promise { 49 | const args = parseArgs(argv); 50 | const db = await DuckDBClient.connect({ 51 | databasePath: args.databasePath, 52 | ensureDirectory: false, 53 | }); 54 | try { 55 | let repoId: number | null = null; 56 | if (args.repoRoot) { 57 | const normalizedRepoRoot = normalizeRepoPath(resolve(args.repoRoot)); 58 | const repoRows = await db.all<{ id: number }>( 59 | `SELECT id FROM repo WHERE root = ? OR normalized_root = ? LIMIT 1`, 60 | [normalizedRepoRoot, normalizedRepoRoot] 61 | ); 62 | if (repoRows.length === 0) { 63 | throw new Error(`Repository not found for root: ${normalizedRepoRoot}`); 64 | } 65 | repoId = repoRows[0]!.id; 66 | } 67 | 68 | const rows = await db.all<{ 69 | repo_id: number; 70 | root: string | null; 71 | hint_value: string; 72 | expansion_kind: string; 73 | target_path: string | null; 74 | payload: unknown; 75 | created_at: string; 76 | }>( 77 | ` 78 | SELECT he.repo_id, 79 | r.root, 80 | he.hint_value, 81 | he.expansion_kind, 82 | he.target_path, 83 | he.payload, 84 | he.created_at 85 | FROM hint_expansion he 86 | LEFT JOIN repo r ON r.id = he.repo_id 87 | WHERE (? IS NULL OR he.repo_id = ?) 88 | ORDER BY he.created_at DESC 89 | LIMIT ? 90 | `, 91 | [repoId, repoId, args.limit] 92 | ); 93 | 94 | const serialized = JSON.stringify(rows, null, 2); 95 | if (args.outPath) { 96 | await writeFile(args.outPath, `${serialized}\n`, "utf8"); 97 | console.info(`Hint expansion log written to ${args.outPath} (${rows.length} rows).`); 98 | } else { 99 | console.info(serialized); 100 | } 101 | } finally { 102 | await db.close(); 103 | } 104 | } 105 | 106 | const executedDirectly = 107 | typeof process.argv[1] === "string" && new URL(import.meta.url).pathname === process.argv[1]; 108 | 109 | if (executedDirectly) { 110 | main().catch((error) => { 111 | console.error("Failed to dump hint expansions:", error); 112 | process.exitCode = 1; 113 | }); 114 | } 115 | --------------------------------------------------------------------------------