├── .windsurfrules ├── npm ├── bin │ ├── .gitkeep │ ├── binaries │ │ ├── .gitkeep │ │ └── README.md │ ├── .gitignore │ └── README.md ├── src │ ├── agent │ │ ├── hooks │ │ │ └── index.js │ │ ├── storage │ │ │ ├── index.js │ │ │ ├── InMemoryStorageAdapter.js │ │ │ └── StorageAdapter.js │ │ ├── acp │ │ │ └── index.js │ │ ├── shared │ │ │ └── Session.js │ │ ├── mockProvider.js │ │ ├── mcp │ │ │ └── index.js │ │ └── engines │ │ │ ├── vercel.js │ │ │ └── enhanced-vercel.js │ ├── cli.js │ ├── tools │ │ ├── index.js │ │ └── langchain.js │ └── utils │ │ ├── symlink-utils.js │ │ └── path-validation.js ├── tsconfig.json ├── tests │ ├── unit │ │ ├── system-prompt.test.js │ │ ├── backtickAutoFix.test.js │ │ ├── types-probe-agent-options.test.js │ │ ├── extract-content.test.js │ │ ├── mermaidInfiniteLoopFix.test.js │ │ └── mermaidHtmlEntities.test.js │ ├── nestedQuoteFix.test.js │ ├── setup.js │ └── README.md ├── scripts │ ├── build-mcp.cjs │ └── build-agent.cjs ├── jest.config.js ├── test-grep.js ├── test-grep-simplified.js └── test-codex-e2e.js ├── examples ├── chat │ ├── simple-traces.jsonl │ ├── .gitignore │ ├── logo.png │ ├── bin │ │ └── probe-chat.js │ ├── .dockerignore │ ├── npm │ │ ├── LICENSE │ │ ├── .npmignore │ │ └── package.json │ ├── test-simple-tracing.js │ ├── test-image-spans.js │ ├── test-chat-tracing.js │ ├── test-direct-function.js │ ├── test-github-context.txt │ ├── implement │ │ ├── backends │ │ │ └── registry.js │ │ └── core │ │ │ └── timeouts.js │ ├── cancelRequest.js │ └── auth.js ├── reranker │ ├── src │ │ ├── lib.rs │ │ └── bin │ │ │ └── benchmark.rs │ ├── rust_bert_test │ │ ├── Cargo.toml │ │ └── README.md │ ├── requirements.txt │ ├── Cargo.toml │ ├── test_bert_results.sh │ ├── simple_test.py │ ├── MODELS.md │ ├── MODEL_COMPARISON.md │ ├── download_models.sh │ ├── test_parallel_performance.sh │ ├── test_cross_encoder.sh │ ├── DEBUG_OUTPUT_ANALYSIS.md │ └── test_all_models.sh └── cache_demo.rs ├── logo.png ├── test-api-key.sh ├── site ├── public │ ├── logo.png │ ├── moon.png │ ├── sun.png │ ├── saturn.png │ ├── logo_padded.png │ ├── _redirects │ ├── icons │ │ └── privacy-icon.svg │ └── _headers ├── .vitepress │ ├── components │ │ ├── FeatureSection.vue.d.ts │ │ ├── TestComponent.vue │ │ └── FullWidthFeatureSection.vue │ └── theme │ │ ├── components │ │ ├── BlogPostLayout.vue │ │ ├── FeatureSection.vue │ │ ├── CommandExample.vue │ │ ├── FeatureList.vue │ │ ├── CodeEditor.vue │ │ └── Feature.vue │ │ ├── index.js │ │ ├── blog.css │ │ └── home.css ├── blog.md ├── .env.example ├── wrangler.toml ├── contributing │ └── README.md ├── use-cases │ └── README.md ├── package.json └── DEPLOYMENT.md ├── src ├── mod.rs ├── language │ ├── common.rs │ ├── mod.rs │ ├── language_trait.rs │ ├── block_handling.rs │ ├── factory.rs │ ├── c.rs │ ├── cpp.rs │ ├── php.rs │ ├── java.rs │ └── ruby.rs ├── search │ ├── token_utils.rs │ ├── search_options.rs │ ├── mod.rs │ ├── timeout.rs │ ├── term_exceptions.rs │ ├── test_patterns.rs │ └── limits.rs ├── version.rs └── simd_test.rs ├── tests ├── fixtures │ └── user │ │ └── AssemblyInfo.cs ├── mocks │ ├── test_ip_whitelist.go │ ├── test_object.js │ └── test_struct.go ├── property_tests.proptest-regressions ├── common.rs ├── test_file.rs ├── test_tokenize.rs ├── schemas │ └── xml_output_schema.xsd ├── lib_usage.rs ├── outline_keyword_preservation_test.rs ├── extract_input_file_tests.rs └── nested_symbol_extraction_tests.rs ├── test-probe-implementation └── hello.js ├── codex-config └── config.toml ├── .githooks ├── pre-commit-vow └── post-commit ├── Cross.toml ├── .claude ├── settings.json └── commands │ └── performance-review.md ├── .cargo └── config.toml ├── test_data └── test_nested_struct.go ├── .github ├── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md └── workflows │ ├── visor.yml │ ├── README-docker.md │ └── vitepress-gh-pages.yml.disabled ├── result1.txt ├── .dockerignore ├── result2.txt ├── Dockerfile ├── docker-compose.yml ├── scripts └── claude-hook-wrapper.sh ├── .prompts └── engineer.md ├── .gitignore ├── .roomodes └── SECURITY.md /.windsurfrules: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /npm/bin/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/chat/simple-traces.jsonl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/chat/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | dist/ 3 | .env -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buger/probe/HEAD/logo.png -------------------------------------------------------------------------------- /test-api-key.sh: -------------------------------------------------------------------------------- 1 | export ANTHROPIC_API_KEY="your-actual-api-key-here" 2 | -------------------------------------------------------------------------------- /site/public/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buger/probe/HEAD/site/public/logo.png -------------------------------------------------------------------------------- /site/public/moon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buger/probe/HEAD/site/public/moon.png -------------------------------------------------------------------------------- /site/public/sun.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buger/probe/HEAD/site/public/sun.png -------------------------------------------------------------------------------- /examples/chat/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buger/probe/HEAD/examples/chat/logo.png -------------------------------------------------------------------------------- /npm/src/agent/hooks/index.js: -------------------------------------------------------------------------------- 1 | export { HookManager, HOOK_TYPES } from './HookManager.js'; 2 | -------------------------------------------------------------------------------- /site/public/saturn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buger/probe/HEAD/site/public/saturn.png -------------------------------------------------------------------------------- /site/public/logo_padded.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buger/probe/HEAD/site/public/logo_padded.png -------------------------------------------------------------------------------- /npm/bin/binaries/.gitkeep: -------------------------------------------------------------------------------- 1 | # This file ensures the binaries directory is tracked by git even when empty 2 | -------------------------------------------------------------------------------- /examples/reranker/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod reranker; 2 | pub mod benchmark; 3 | pub mod bert_simulator; 4 | pub mod parallel_reranker; -------------------------------------------------------------------------------- /src/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod models; 2 | pub mod language; 3 | pub mod search; 4 | pub mod cli; 5 | pub mod server; 6 | pub mod ranking; 7 | -------------------------------------------------------------------------------- /npm/bin/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore all files in this directory 2 | * 3 | # Except these files 4 | !.gitignore 5 | !.gitkeep 6 | !README.md 7 | !probe 8 | -------------------------------------------------------------------------------- /tests/fixtures/user/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft. All rights reserved. 2 | 3 | [assembly: System.Resources.NeutralResourcesLanguage("en-US")] -------------------------------------------------------------------------------- /npm/src/agent/storage/index.js: -------------------------------------------------------------------------------- 1 | export { StorageAdapter } from './StorageAdapter.js'; 2 | export { InMemoryStorageAdapter } from './InMemoryStorageAdapter.js'; 3 | -------------------------------------------------------------------------------- /test-probe-implementation/hello.js: -------------------------------------------------------------------------------- 1 | // A simple hello world function 2 | function hello(name) { 3 | console.log("Hello, " + name); 4 | } 5 | 6 | // Test the function 7 | hello("World"); -------------------------------------------------------------------------------- /site/.vitepress/components/FeatureSection.vue.d.ts: -------------------------------------------------------------------------------- 1 | declare module '@theme/components/FeatureSection.vue' { 2 | import { DefineComponent } from 'vue' 3 | const component: DefineComponent<{}, {}, any> 4 | export default component 5 | } -------------------------------------------------------------------------------- /codex-config/config.toml: -------------------------------------------------------------------------------- 1 | # IMPORTANT: the top-level key is `mcp_servers` rather than `mcpServers`. 2 | [mcp_servers.code-search] 3 | command = "npx" 4 | args = ["-y", "@probelabs/probe", "mcp", "server"] 5 | # env = { "API_KEY" = "value" } 6 | 7 | -------------------------------------------------------------------------------- /site/blog.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Probe Blog 3 | description: Latest news, updates, and insights about Probe - the AI-friendly code search tool 4 | --- 5 | 6 | # Redirecting to Blog... 7 | 8 | Please visit the [Probe Blog](/blog/) for the latest news and updates. -------------------------------------------------------------------------------- /npm/src/agent/acp/index.js: -------------------------------------------------------------------------------- 1 | // ACP (Agent Client Protocol) module exports 2 | export { ACPServer } from './server.js'; 3 | export { ACPConnection } from './connection.js'; 4 | export { ACPToolCall, ACPToolManager } from './tools.js'; 5 | export * from './types.js'; -------------------------------------------------------------------------------- /.githooks/pre-commit-vow: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Vow - AI accountability gate 3 | # This hook runs at the end of the pre-commit pipeline 4 | 5 | npx @probelabs/vow check 6 | if [ $? -ne 0 ]; then 7 | echo "Vow check failed. Commit aborted." 8 | exit 1 9 | fi 10 | -------------------------------------------------------------------------------- /Cross.toml: -------------------------------------------------------------------------------- 1 | # Cross-compilation configuration for probe 2 | # This configures the cross tool for ARM64 Linux builds 3 | 4 | [build.env] 5 | passthrough = [ 6 | "CARGO_INCREMENTAL", 7 | "CARGO_TERM_COLOR", 8 | ] 9 | 10 | # Cross uses prebuilt images for musl targets; no per-target overrides needed. 11 | -------------------------------------------------------------------------------- /examples/reranker/rust_bert_test/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rust-bert-reranker-test" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | rust-bert = "0.21" 8 | anyhow = "1.0" 9 | tokio = { version = "1.0", features = ["full"] } 10 | 11 | # For downloading model files 12 | tch = "0.13.0" -------------------------------------------------------------------------------- /site/.env.example: -------------------------------------------------------------------------------- 1 | # Environment variables for Cloudflare Pages deployment 2 | # Copy this to .env and configure as needed 3 | 4 | # Cloudflare configuration (optional) 5 | # CLOUDFLARE_ACCOUNT_ID=your-account-id 6 | # CLOUDFLARE_API_TOKEN=your-api-token 7 | 8 | # Build configuration 9 | NODE_VERSION=20 10 | NPM_VERSION=latest -------------------------------------------------------------------------------- /.claude/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "hooks": { 3 | "Stop": [ 4 | { 5 | "hooks": [ 6 | { 7 | "type": "command", 8 | "command": "$CLAUDE_PROJECT_DIR/scripts/claude-hook-wrapper.sh $CLAUDE_PROJECT_DIR/.githooks/pre-commit" 9 | } 10 | ] 11 | } 12 | ] 13 | } 14 | } -------------------------------------------------------------------------------- /examples/chat/bin/probe-chat.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | /** 4 | * @probelabs/probe-chat CLI 5 | * Command-line interface for Probe code search chat 6 | * 7 | * This is a thin wrapper around the main functionality in index.js 8 | */ 9 | 10 | import { main } from '../index.js'; 11 | 12 | // Execute the main function 13 | main(); -------------------------------------------------------------------------------- /.githooks/post-commit: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Post-commit hook to provide feedback after a successful commit 4 | # Installed by 'make install-hooks' 5 | 6 | # Colors for better output 7 | GREEN='\033[0;32m' 8 | NC='\033[0m' # No Color 9 | 10 | echo "${GREEN}Commit successful! All checks passed.${NC}" 11 | echo "${GREEN}Remember to push your changes.${NC}" 12 | -------------------------------------------------------------------------------- /tests/mocks/test_ip_whitelist.go: -------------------------------------------------------------------------------- 1 | package middleware 2 | 3 | // IPWhiteListMiddleware is a middleware that checks if the client's IP is in the whitelist 4 | type IPWhiteListMiddleware struct { 5 | Whitelist []string 6 | } 7 | 8 | // Name returns the name of the middleware 9 | func (i *IPWhiteListMiddleware) Name() string { 10 | return "IPWhiteListMiddleware" 11 | } -------------------------------------------------------------------------------- /examples/reranker/requirements.txt: -------------------------------------------------------------------------------- 1 | # Requirements for cross-encoder testing and debugging 2 | torch>=1.9.0 3 | transformers>=4.20.0 4 | sentence-transformers>=2.2.0 5 | numpy>=1.21.0 6 | 7 | # Optional but recommended for better performance 8 | tokenizers>=0.13.0 9 | 10 | # For additional debugging and analysis 11 | matplotlib>=3.5.0 12 | seaborn>=0.11.0 13 | pandas>=1.3.0 -------------------------------------------------------------------------------- /site/public/_redirects: -------------------------------------------------------------------------------- 1 | # Redirect rules for Cloudflare Pages 2 | 3 | # Redirect www to main domain 4 | https://www.probelabs.com/* https://probelabs.com/:splat 301 5 | 6 | # Handle clean URLs - VitePress already generates clean URLs 7 | # but this ensures fallback behavior 8 | /docs/* /404.html 404 9 | 10 | # Fallback for any 404s to custom 404 page 11 | /* /404.html 404 -------------------------------------------------------------------------------- /site/wrangler.toml: -------------------------------------------------------------------------------- 1 | # Cloudflare Pages configuration for Probe documentation site 2 | name = "probe-docs" 3 | compatibility_date = "2024-01-15" 4 | 5 | # Pages project configuration 6 | pages_build_output_dir = ".vitepress/dist" 7 | 8 | # Note: Custom domains (probelabs.com) are configured via Cloudflare Dashboard 9 | # Routes configuration is not supported in wrangler.toml for Pages -------------------------------------------------------------------------------- /site/public/icons/privacy-icon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /site/contributing/README.md: -------------------------------------------------------------------------------- 1 | # Contributing to Probe 2 | 3 | This directory contains guides for contributors to the Probe project, including: 4 | 5 | - Documentation Maintenance 6 | - Documentation Cross-References 7 | - Documentation Structure 8 | 9 | For general contribution guidelines, please see the [Contributing Guide](https://github.com/probelabs/probe/blob/main/CONTRIBUTING.md) in the main repository. -------------------------------------------------------------------------------- /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | # Target-specific overrides for Windows MSVC 2 | [target.x86_64-pc-windows-msvc] 3 | rustflags = ["-C", "target-feature=+crt-static", "-C", "link-args=/DEBUG:NONE /NOLOGO"] 4 | 5 | [target.i686-pc-windows-msvc] 6 | rustflags = ["-C", "target-feature=+crt-static", "-C", "link-args=/DEBUG:NONE /NOLOGO"] 7 | 8 | # Configuration for Windows builds 9 | [target.'cfg(target_os = "windows")'] 10 | # General Windows configuration -------------------------------------------------------------------------------- /src/language/common.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashSet; 2 | use tree_sitter::Node; 3 | 4 | /// Helper function to collect all node types in the AST 5 | pub fn collect_node_types(node: Node, node_types: &mut HashSet) { 6 | node_types.insert(node.kind().to_string()); 7 | 8 | let mut cursor = node.walk(); 9 | for child in node.children(&mut cursor) { 10 | collect_node_types(child, node_types); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /site/use-cases/README.md: -------------------------------------------------------------------------------- 1 | # Use Cases & Workflows 2 | 3 | This directory contains scenario-based documentation for different Probe usage patterns. 4 | 5 | - **Web Interface**: Code as a source of truth for product functionality 6 | - **AI Code Editors & MCP**: Integration with AI-powered code editors 7 | - **CLI for AI Workflows**: Using Probe in the command line for AI-assisted workflows 8 | - **Developers & SDK**: Building custom tools with the Node.js SDK -------------------------------------------------------------------------------- /npm/bin/README.md: -------------------------------------------------------------------------------- 1 | # Probe Binary Directory 2 | 3 | This directory is used to store the downloaded probe binary. 4 | 5 | The binary is automatically downloaded during package installation. 6 | If you encounter any issues with the download, you can manually place the probe binary in this directory. 7 | 8 | Binary name should be: 9 | - `probe` (on Linux/macOS) 10 | - `probe.exe` (on Windows) 11 | 12 | You can download the binary from: https://github.com/probelabs/probe/releases 13 | -------------------------------------------------------------------------------- /test_data/test_nested_struct.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "net/http" 5 | 6 | "github.com/gin-gonic/gin" 7 | ) 8 | 9 | func HandleNotFound(c *gin.Context) { 10 | c.JSON(http.StatusNotFound, ErrorResponse{ 11 | Errors: []struct { 12 | Title string `json:"title"` 13 | Detail string `json:"detail"` 14 | }{{Title: "Not Found", Detail: "Model price not found"}}, 15 | }) 16 | } 17 | 18 | type ErrorResponse struct { 19 | Errors interface{} `json:"errors"` 20 | } 21 | 22 | func main() { 23 | r := gin.Default() 24 | r.NoRoute(HandleNotFound) 25 | r.Run() 26 | } 27 | -------------------------------------------------------------------------------- /src/search/token_utils.rs: -------------------------------------------------------------------------------- 1 | use std::sync::OnceLock; 2 | use tiktoken_rs::p50k_base; 3 | use tiktoken_rs::CoreBPE; 4 | 5 | /// Returns a reference to the tiktoken tokenizer 6 | pub fn get_tokenizer() -> &'static CoreBPE { 7 | static TOKENIZER: OnceLock = OnceLock::new(); 8 | TOKENIZER.get_or_init(|| p50k_base().unwrap()) 9 | } 10 | 11 | /// Helper function to count tokens in a string using tiktoken (same tokenizer as GPT models) 12 | pub fn count_tokens(text: &str) -> usize { 13 | let tokenizer = get_tokenizer(); 14 | tokenizer.encode_with_special_tokens(text).len() 15 | } 16 | -------------------------------------------------------------------------------- /examples/chat/.dockerignore: -------------------------------------------------------------------------------- 1 | # Node.js 2 | node_modules/ 3 | npm-debug.log* 4 | yarn-debug.log* 5 | yarn-error.log* 6 | .npm 7 | .yarn 8 | 9 | # Logs 10 | *.log 11 | logs/ 12 | *.tmp 13 | 14 | # Test files (we'll copy them explicitly if needed) 15 | test/ 16 | *.test.js 17 | test-*.js 18 | 19 | # Development files 20 | .gitignore 21 | .git/ 22 | .github/ 23 | .vscode/ 24 | .idea/ 25 | 26 | # Temporary files 27 | probe-debug.txt 28 | simple-traces.jsonl 29 | *.cache 30 | 31 | # Documentation (keep only essential ones) 32 | TRACING.md 33 | README.md 34 | 35 | # OS generated files 36 | .DS_Store 37 | Thumbs.db -------------------------------------------------------------------------------- /npm/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "module": "ESNext", 5 | "moduleResolution": "node", 6 | "lib": ["ES2020"], 7 | "outDir": "./build", 8 | "rootDir": "./src", 9 | "strict": true, 10 | "esModuleInterop": true, 11 | "allowSyntheticDefaultImports": true, 12 | "skipLibCheck": true, 13 | "forceConsistentCasingInFileNames": true, 14 | "resolveJsonModule": true, 15 | "allowJs": true, 16 | "declaration": true, 17 | "declarationMap": true 18 | }, 19 | "include": ["src/**/*"], 20 | "exclude": ["node_modules", "build", "bin"] 21 | } -------------------------------------------------------------------------------- /site/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "probe-docs", 3 | "version": "1.0.0", 4 | "description": "Documentation for Probe - AI-friendly code search tool", 5 | "scripts": { 6 | "docs:dev": "vitepress dev", 7 | "docs:build": "vitepress build", 8 | "docs:preview": "vitepress preview", 9 | "dev": "vitepress dev", 10 | "build": "vitepress build", 11 | "preview": "vitepress preview" 12 | }, 13 | "dependencies": { 14 | "highlight.js": "^11.11.1", 15 | "vitepress": "^1.6.3" 16 | }, 17 | "devDependencies": { 18 | "@vuepress/plugin-shiki": "^2.0.0-rc.83", 19 | "markdown-it": "^14.1.0" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /tests/property_tests.proptest-regressions: -------------------------------------------------------------------------------- 1 | # Seeds for failure cases proptest has generated in the past. It is 2 | # automatically read and these particular cases re-run before any 3 | # novel cases are generated. 4 | # 5 | # It is recommended to check this file in to source control so that 6 | # everyone who runs the test benefits from these saved cases. 7 | cc 4389d9e5a829d147cb11cb4b5182e75a0741f36823f0ade724d5f19102e93dcf # shrinks to query = "'a?" 8 | cc 8553eb31be6672c0c6fd031bee95f520d4bb55fec15e8474d10a3c5b5ba8b4c4 # shrinks to query = "[" 9 | cc 6a5d367e62d691869428703bc771f64a21557cab6f6ae6837e8d2f179d1efe0e # shrinks to docs = [""], query = "" 10 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /site/public/_headers: -------------------------------------------------------------------------------- 1 | # Global headers for all files 2 | /* 3 | X-Frame-Options: DENY 4 | X-Content-Type-Options: nosniff 5 | Referrer-Policy: strict-origin-when-cross-origin 6 | X-XSS-Protection: 1; mode=block 7 | 8 | # Cache static assets for 1 year 9 | /assets/* 10 | Cache-Control: public, max-age=31536000, immutable 11 | 12 | # Cache images for 1 week 13 | /*.png 14 | Cache-Control: public, max-age=604800 15 | 16 | /*.jpg 17 | Cache-Control: public, max-age=604800 18 | 19 | /*.svg 20 | Cache-Control: public, max-age=604800 21 | 22 | # Cache main page for 1 hour 23 | / 24 | Cache-Control: public, max-age=3600 25 | 26 | # Cache other HTML files for 1 hour 27 | /*.html 28 | Cache-Control: public, max-age=3600 -------------------------------------------------------------------------------- /examples/reranker/src/bin/benchmark.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use clap::Parser; 3 | use bert_reranker::benchmark::{BenchmarkArgs, run_benchmark, print_document_stats, collect_source_files}; 4 | 5 | #[tokio::main] 6 | async fn main() -> Result<()> { 7 | let args = BenchmarkArgs::parse(); 8 | 9 | println!("🚀 BERT Reranker Performance Benchmark"); 10 | println!("======================================"); 11 | 12 | // Collect documents first to show stats 13 | let documents = collect_source_files(&args)?; 14 | print_document_stats(&documents); 15 | 16 | // Run the benchmark 17 | let result = run_benchmark(args).await?; 18 | 19 | // Print results 20 | result.print_summary(); 21 | 22 | Ok(()) 23 | } -------------------------------------------------------------------------------- /result1.txt: -------------------------------------------------------------------------------- 1 | Pattern: yaml workflow agent multi-agent user input 2 | Path: /Users/leonidbugaev/go/src/semantic-kernel/dotnet/src/VectorData/VectorData.Abstractions/Properties 3 | Options: Reranker: bm25, No block merging 4 | Using BM25 ranking (Okapi BM25 algorithm) 5 | Search completed in 34.73ms 6 | 7 | File: /Users/leonidbugaev/go/src/semantic-kernel/dotnet/src/VectorData/VectorData.Abstractions/Properties/AssemblyInfo.cs 8 | Lines: 1-4 9 | ```cs 10 | 11 | ``` 12 | File: /Users/leonidbugaev/go/src/semantic-kernel/dotnet/src/VectorData/VectorData.Abstractions/Properties/AssemblyInfo.cs 13 | Lines: 1-1 14 | ```cs 15 | // Copyright (c) Microsoft. All rights reserved. 16 | ``` 17 | Found 2 search results 18 | Total bytes returned: 51 19 | Total tokens returned: 14 20 | -------------------------------------------------------------------------------- /examples/reranker/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "bert-reranker" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [[bin]] 7 | name = "reranker" 8 | path = "src/main.rs" 9 | 10 | [[bin]] 11 | name = "demo" 12 | path = "src/demo.rs" 13 | 14 | [[bin]] 15 | name = "benchmark" 16 | path = "src/bin/benchmark.rs" 17 | 18 | [dependencies] 19 | candle-core = "0.8" 20 | candle-nn = "0.8" 21 | candle-transformers = "0.8" 22 | tokenizers = "0.20" 23 | anyhow = "1.0" 24 | clap = { version = "4.0", features = ["derive"] } 25 | serde = { version = "1.0", features = ["derive"] } 26 | serde_json = "1.0" 27 | hf-hub = { version = "0.3", features = ["tokio"] } 28 | safetensors = "0.4" 29 | tokio = { version = "1.0", features = ["full"] } 30 | walkdir = "2.4" 31 | rayon = "1.8" 32 | parking_lot = "0.12" 33 | tempfile = "3.8" -------------------------------------------------------------------------------- /examples/chat/npm/LICENSE: -------------------------------------------------------------------------------- 1 | ISC License 2 | 3 | Copyright (c) 2024 Leonid Bugaev 4 | 5 | Permission to use, copy, modify, and/or distribute this software for any 6 | purpose with or without fee is hereby granted, provided that the above 7 | copyright notice and this permission notice appear in all copies. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -------------------------------------------------------------------------------- /src/search/search_options.rs: -------------------------------------------------------------------------------- 1 | use std::path::Path; 2 | 3 | /// Options for performing a search 4 | pub struct SearchOptions<'a> { 5 | pub path: &'a Path, 6 | pub queries: &'a [String], 7 | pub files_only: bool, 8 | pub custom_ignores: &'a [String], 9 | pub exclude_filenames: bool, 10 | pub reranker: &'a str, 11 | #[allow(dead_code)] 12 | pub frequency_search: bool, 13 | pub exact: bool, 14 | pub language: Option<&'a str>, 15 | pub max_results: Option, 16 | pub max_bytes: Option, 17 | pub max_tokens: Option, 18 | pub allow_tests: bool, 19 | pub no_merge: bool, 20 | pub merge_threshold: Option, 21 | pub dry_run: bool, 22 | pub session: Option<&'a str>, 23 | pub timeout: u64, 24 | pub question: Option<&'a str>, 25 | pub no_gitignore: bool, 26 | } 27 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # Git files 2 | .git/ 3 | .github/ 4 | .gitignore 5 | .githooks/ 6 | 7 | # Build artifacts 8 | target/ 9 | Cargo.lock 10 | 11 | # Documentation 12 | *.md 13 | !README.md 14 | !LICENSE 15 | !ABOUT.MD 16 | docs/ 17 | site/ 18 | 19 | # IDE and editor files 20 | .vscode/ 21 | .idea/ 22 | *.swp 23 | *.swo 24 | *~ 25 | .DS_Store 26 | 27 | # Test files 28 | tests/ 29 | test_cases/ 30 | *.test.* 31 | 32 | # Scripts and CI 33 | scripts/ 34 | .claude/ 35 | 36 | # Node.js artifacts (for examples/chat) 37 | node_modules/ 38 | npm-debug.log* 39 | yarn-debug.log* 40 | yarn-error.log* 41 | .npm/ 42 | .yarn/ 43 | 44 | # Examples (but keep benchmarks - needed for Cargo.toml validation) 45 | examples/ 46 | !examples/chat/ 47 | 48 | # MCP directories 49 | mcp-agent/ 50 | 51 | # NPM package directory 52 | npm/ 53 | 54 | # Temporary files 55 | *.tmp 56 | *.log 57 | *.cache -------------------------------------------------------------------------------- /tests/common.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use std::process::Command; 3 | 4 | pub struct TestContext; 5 | 6 | impl Default for TestContext { 7 | fn default() -> Self { 8 | Self::new() 9 | } 10 | } 11 | 12 | impl TestContext { 13 | pub fn new() -> Self { 14 | TestContext 15 | } 16 | 17 | pub fn run_probe(&self, args: &[&str]) -> Result { 18 | let output = Command::new("cargo") 19 | .args(["run", "--"]) 20 | .args(args) 21 | .output()?; 22 | 23 | if !output.status.success() { 24 | anyhow::bail!( 25 | "Command failed with status {}: {}", 26 | output.status, 27 | String::from_utf8_lossy(&output.stderr) 28 | ); 29 | } 30 | 31 | Ok(String::from_utf8_lossy(&output.stdout).to_string()) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /tests/mocks/test_object.js: -------------------------------------------------------------------------------- 1 | // Example JavaScript object with properties 2 | 3 | const user = { 4 | id: 1, 5 | name: "John Smith", 6 | email: "john.smith@example.com", 7 | profile: { 8 | age: 30, 9 | occupation: "Software Engineer", 10 | skills: ["JavaScript", "TypeScript", "React", "Node.js"] 11 | }, 12 | isActive: true, 13 | lastLogin: new Date("2023-01-01") 14 | }; 15 | 16 | // Function to display user information 17 | function displayUserInfo(user) { 18 | console.log(`User: ${user.name} (ID: ${user.id})`); 19 | console.log(`Email: ${user.email}`); 20 | console.log(`Occupation: ${user.profile.occupation}`); 21 | console.log(`Skills: ${user.profile.skills.join(", ")}`); 22 | console.log(`Active: ${user.isActive ? "Yes" : "No"}`); 23 | console.log(`Last Login: ${user.lastLogin.toLocaleDateString()}`); 24 | } 25 | 26 | // Call the function 27 | displayUserInfo(user); 28 | -------------------------------------------------------------------------------- /site/.vitepress/components/TestComponent.vue: -------------------------------------------------------------------------------- 1 | 17 | 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /tests/test_file.rs: -------------------------------------------------------------------------------- 1 | // This is a test file for probe 2 | 3 | #[allow(dead_code)] 4 | fn function1() { 5 | println!("This is function 1"); 6 | // Some code here 7 | let x = 10; 8 | let y = 20; 9 | println!("Sum: {}", x + y); 10 | } 11 | 12 | // This comment is between function1 and function2 13 | // Only a few lines of separation 14 | 15 | #[allow(dead_code)] 16 | fn function2() { 17 | println!("This is function 2"); 18 | // Some code here 19 | let a = 30; 20 | let b = 40; 21 | println!("Product: {}", a * b); 22 | } 23 | 24 | // This is a larger gap between functions 25 | 26 | // More comments 27 | // More comments 28 | // More comments 29 | // More comments 30 | // More comments 31 | // More comments 32 | // More comments 33 | // More comments 34 | // More comments 35 | // More comments 36 | // More comments 37 | 38 | #[allow(dead_code)] 39 | fn function3() { 40 | println!("This is function 3"); 41 | // Some code here 42 | let c = 50; 43 | let d = 60; 44 | println!("Difference: {}", d - c); 45 | } 46 | -------------------------------------------------------------------------------- /examples/chat/npm/.npmignore: -------------------------------------------------------------------------------- 1 | # Development files 2 | .git 3 | .github 4 | .gitignore 5 | .vscode 6 | .idea 7 | .DS_Store 8 | 9 | # Test files 10 | test 11 | tests 12 | __tests__ 13 | *.test.js 14 | *.spec.js 15 | 16 | # Documentation 17 | docs 18 | doc 19 | *.md 20 | !README.md 21 | 22 | # Build files 23 | .travis.yml 24 | .gitlab-ci.yml 25 | .github 26 | .circleci 27 | 28 | # Logs 29 | logs 30 | *.log 31 | npm-debug.log* 32 | yarn-debug.log* 33 | yarn-error.log* 34 | 35 | # Runtime data 36 | pids 37 | *.pid 38 | *.seed 39 | *.pid.lock 40 | 41 | # Coverage directory used by tools like istanbul 42 | coverage 43 | .nyc_output 44 | 45 | # Dependency directories 46 | node_modules 47 | 48 | # Optional npm cache directory 49 | .npm 50 | 51 | # Optional eslint cache 52 | .eslintcache 53 | 54 | # Optional REPL history 55 | .node_repl_history 56 | 57 | # Output of 'npm pack' 58 | *.tgz 59 | 60 | # dotenv environment variable files 61 | .env 62 | .env.local 63 | .env.development.local 64 | .env.test.local 65 | .env.production.local 66 | 67 | # Temporary files 68 | tmp 69 | temp -------------------------------------------------------------------------------- /npm/tests/unit/system-prompt.test.js: -------------------------------------------------------------------------------- 1 | import { describe, test, expect } from '@jest/globals'; 2 | import { ProbeAgent } from '../../src/agent/ProbeAgent.js'; 3 | 4 | describe('ProbeAgent systemPrompt alias', () => { 5 | test('uses systemPrompt when provided', () => { 6 | const agent = new ProbeAgent({ 7 | path: process.cwd(), 8 | systemPrompt: 'system-level prompt' 9 | }); 10 | 11 | expect(agent.customPrompt).toBe('system-level prompt'); 12 | }); 13 | 14 | test('systemPrompt takes precedence over customPrompt', () => { 15 | const agent = new ProbeAgent({ 16 | path: process.cwd(), 17 | systemPrompt: 'primary system prompt', 18 | customPrompt: 'secondary custom prompt' 19 | }); 20 | 21 | expect(agent.customPrompt).toBe('primary system prompt'); 22 | }); 23 | 24 | test('falls back to customPrompt when systemPrompt is absent', () => { 25 | const agent = new ProbeAgent({ 26 | path: process.cwd(), 27 | customPrompt: 'custom prompt only' 28 | }); 29 | 30 | expect(agent.customPrompt).toBe('custom prompt only'); 31 | }); 32 | }); 33 | -------------------------------------------------------------------------------- /result2.txt: -------------------------------------------------------------------------------- 1 | Pattern: yaml workflow agent multi-agent user input 2 | Path: /Users/leonidbugaev/go/src/semantic-kernel/dotnet/src/VectorData/VectorData.Abstractions/Properties 3 | Options: Reranker: bm25, No block merging 4 | Using BM25 ranking (Okapi BM25 algorithm) 5 | Search completed in 30.19ms 6 | 7 | File: /Users/leonidbugaev/go/src/semantic-kernel/dotnet/src/VectorData/VectorData.Abstractions/Properties/AssemblyInfo.cs 8 | Lines: 1-1 9 | ```cs 10 | // Copyright (c) Microsoft. All rights reserved. 11 | ``` 12 | File: /Users/leonidbugaev/go/src/semantic-kernel/dotnet/src/VectorData/VectorData.Abstractions/Properties/AssemblyInfo.cs 13 | Lines: 3-3 14 | ```cs 15 | [assembly: System.Resources.NeutralResourcesLanguage("en-US")] 16 | ``` 17 | File: /Users/leonidbugaev/go/src/semantic-kernel/dotnet/src/VectorData/VectorData.Abstractions/Properties/AssemblyInfo.cs 18 | Lines: 1-3 19 | ```cs 20 | // Copyright (c) Microsoft. All rights reserved. 21 | 22 | [assembly: System.Resources.NeutralResourcesLanguage("en-US")] 23 | ``` 24 | Found 3 search results 25 | Total bytes returned: 228 26 | Total tokens returned: 64 27 | -------------------------------------------------------------------------------- /examples/chat/test-simple-tracing.js: -------------------------------------------------------------------------------- 1 | import { ProbeChat } from './probeChat.js'; 2 | 3 | // Simple test to check if tracing works 4 | async function testSimpleTracing() { 5 | console.log('Testing simple tracing...\n'); 6 | 7 | try { 8 | // Create a ProbeChat instance with debug enabled 9 | const probeChat = new ProbeChat({ 10 | debug: true 11 | }); 12 | 13 | // Test just the extractImageUrls function directly 14 | const message = 'Here is an image: https://github.com/user-attachments/assets/example.png'; 15 | 16 | console.log('🔍 Testing extractImageUrls function...'); 17 | 18 | // Import the function to test it directly 19 | const { extractImageUrls } = await import('./probeChat.js'); 20 | 21 | // This should create a span 22 | const result = await extractImageUrls(message, true); 23 | 24 | console.log('✅ extractImageUrls result:', result); 25 | console.log('🎉 Test completed!'); 26 | 27 | } catch (error) { 28 | console.error('❌ Test failed:', error.message); 29 | } 30 | } 31 | 32 | testSimpleTracing().catch(console.error); -------------------------------------------------------------------------------- /npm/scripts/build-mcp.cjs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | const fs = require('fs-extra'); 4 | const path = require('path'); 5 | const { execSync } = require('child_process'); 6 | 7 | async function buildMcp() { 8 | try { 9 | console.log('Building MCP TypeScript...'); 10 | 11 | // Ensure build directory exists 12 | await fs.ensureDir('build'); 13 | 14 | // Copy src files to build directory 15 | console.log('Copying source files...'); 16 | await fs.copy('src', 'build', { 17 | overwrite: true, 18 | errorOnExist: false 19 | }); 20 | 21 | // Run TypeScript compiler 22 | console.log('Compiling TypeScript...'); 23 | execSync('tsc src/mcp/index.ts --outDir build/mcp --module esnext --target es2020 --moduleResolution node --esModuleInterop --allowSyntheticDefaultImports --skipLibCheck', { 24 | stdio: 'inherit', 25 | cwd: process.cwd() 26 | }); 27 | 28 | console.log('✅ MCP build completed successfully'); 29 | 30 | } catch (error) { 31 | console.error('❌ MCP build failed:', error.message); 32 | process.exit(1); 33 | } 34 | } 35 | 36 | buildMcp(); -------------------------------------------------------------------------------- /.github/workflows/visor.yml: -------------------------------------------------------------------------------- 1 | name: Visor Code Review 2 | 3 | on: 4 | pull_request: 5 | types: [opened, synchronize] 6 | issues: 7 | types: [opened] 8 | issue_comment: 9 | types: [created] 10 | 11 | permissions: 12 | contents: read 13 | pull-requests: write 14 | issues: write 15 | checks: write 16 | 17 | jobs: 18 | code-review: 19 | runs-on: ubuntu-latest 20 | steps: 21 | - name: Checkout code 22 | uses: actions/checkout@v4 23 | - uses: probelabs/visor@main 24 | with: 25 | app-id: ${{ secrets.APP_ID }} 26 | private-key: ${{ secrets.APP_PRIVATE_KEY }} 27 | installation-id: ${{ secrets.APP_INSTALLATION_ID }} 28 | debug: 'true' 29 | env: 30 | # AI Provider API Keys (configure one of these in your repository secrets) 31 | # GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} 32 | ANTHROPIC_API_KEY: ${{ secrets.GLM_API_KEY }} 33 | ANTHROPIC_API_URL: 'https://api.z.ai/api/anthropic/v1' 34 | # OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} 35 | # Optional: Specify the AI model to use 36 | MODEL_NAME: 'glm-4.6' 37 | -------------------------------------------------------------------------------- /npm/jest.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('jest').Config} */ 2 | export default { 3 | transform: {}, 4 | 5 | // Test environment 6 | testEnvironment: 'node', 7 | 8 | // Test file patterns - run stable test files 9 | testMatch: [ 10 | '**/tests/**/*.test.js', 11 | '**/src/agent/acp/tools.test.js', 12 | '**/src/agent/acp/connection.test.js', 13 | '**/src/agent/acp/types.test.js' 14 | ], 15 | 16 | // Coverage configuration 17 | collectCoverageFrom: [ 18 | 'src/**/*.js', 19 | '!src/**/*.test.js', 20 | '!src/test-*.js', 21 | '!**/node_modules/**', 22 | '!**/build/**', 23 | '!**/dist/**' 24 | ], 25 | 26 | // Coverage thresholds 27 | coverageThreshold: { 28 | global: { 29 | branches: 70, 30 | functions: 70, 31 | lines: 70, 32 | statements: 70 33 | } 34 | }, 35 | 36 | // Coverage reporters 37 | coverageReporters: [ 38 | 'text', 39 | 'lcov', 40 | 'html' 41 | ], 42 | 43 | // Setup files 44 | setupFilesAfterEnv: ['/tests/setup.js'], 45 | 46 | 47 | // Verbose output 48 | verbose: true, 49 | 50 | // Timeout for tests 51 | testTimeout: 10000 52 | }; -------------------------------------------------------------------------------- /npm/tests/unit/backtickAutoFix.test.js: -------------------------------------------------------------------------------- 1 | import { validateAndFixMermaidResponse } from '../../src/agent/schemaUtils.js'; 2 | 3 | describe('Mermaid Auto-Fix - Backticks', () => { 4 | const mockOptions = { 5 | debug: false, 6 | path: '/test/path', 7 | provider: 'anthropic', 8 | model: 'claude-3-sonnet-20240229' 9 | }; 10 | 11 | describe('Auto-fix backticks in node labels', () => { 12 | 13 | 14 | 15 | 16 | 17 | test('should remove backticks from quoted labels', async () => { 18 | const response = `\`\`\`mermaid 19 | flowchart TD 20 | A["Already quoted \`backticks\`"] --> B{"Also quoted \`here\`"} 21 | \`\`\``; 22 | 23 | const result = await validateAndFixMermaidResponse(response, mockOptions); 24 | 25 | // @probelabs/maid v0.0.15+ treats backticks inside quoted labels as errors (FL-LABEL-BACKTICK) 26 | // and removes them during auto-fix. This is the expected behavior. 27 | expect(result.fixedResponse).toContain('A["Already quoted backticks"]'); 28 | expect(result.fixedResponse).toContain('B{"Also quoted here"}'); 29 | }); 30 | 31 | }); 32 | 33 | describe('Validation detects backticks correctly', () => { 34 | }); 35 | }); 36 | -------------------------------------------------------------------------------- /site/.vitepress/theme/components/BlogPostLayout.vue: -------------------------------------------------------------------------------- 1 | 22 | 23 | 37 | 38 | -------------------------------------------------------------------------------- /site/.vitepress/theme/components/FeatureSection.vue: -------------------------------------------------------------------------------- 1 | 11 | 12 | -------------------------------------------------------------------------------- /src/version.rs: -------------------------------------------------------------------------------- 1 | //! Version utilities for probe 2 | //! 3 | //! This module provides utilities for getting version information at runtime. 4 | 5 | /// Get the version string from Cargo.toml 6 | pub fn get_version() -> &'static str { 7 | env!("CARGO_PKG_VERSION") 8 | } 9 | 10 | /// Get the package name from Cargo.toml 11 | pub fn get_package_name() -> &'static str { 12 | env!("CARGO_PKG_NAME") 13 | } 14 | 15 | /// Get a formatted version string with package name 16 | pub fn get_version_info() -> String { 17 | format!("{} {}", get_package_name(), get_version()) 18 | } 19 | 20 | #[cfg(test)] 21 | mod tests { 22 | use super::*; 23 | 24 | #[test] 25 | fn test_get_version() { 26 | let version = get_version(); 27 | assert!(!version.is_empty()); 28 | // Should follow semantic versioning pattern 29 | assert!(version.contains('.')); 30 | } 31 | 32 | #[test] 33 | fn test_get_package_name() { 34 | let name = get_package_name(); 35 | assert_eq!(name, "probe-code"); 36 | } 37 | 38 | #[test] 39 | fn test_get_version_info() { 40 | let info = get_version_info(); 41 | assert!(info.contains("probe-code")); 42 | assert!(info.contains('.')); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /tests/test_tokenize.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | // Import the tokenize function from our probe crate 3 | use probe_code::ranking::tokenize; 4 | 5 | // Test strings 6 | let test_strings = ["The quick brown fox jumps over the lazy dog", 7 | "function calculateTotal(items) { return items.reduce((sum, item) => sum + item.price, 0); }", 8 | "class UserController extends BaseController implements UserInterface", 9 | "Searching for files containing important information", 10 | "Fruitlessly searching for the missing variable in the codebase"]; 11 | 12 | println!("Testing tokenization with stop word removal and stemming:\n"); 13 | 14 | for (i, test_str) in test_strings.iter().enumerate() { 15 | println!("Original text {}:\n{}", i + 1, test_str); 16 | 17 | // Tokenize with stop word removal and stemming 18 | let tokens = tokenize(test_str); 19 | 20 | println!("Tokens after stop word removal and stemming:"); 21 | println!("{tokens:?}"); 22 | println!("Number of tokens: {}\n", tokens.len()); 23 | } 24 | 25 | // Specific test for stemming 26 | println!("Specific stemming test:"); 27 | println!("'fruitlessly' stems to: {}", tokenize("fruitlessly")[0]); 28 | } 29 | -------------------------------------------------------------------------------- /tests/mocks/test_struct.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "fmt" 4 | 5 | // Person represents a person with various attributes 6 | type Person struct { 7 | Name string 8 | Age int 9 | Email string 10 | PhoneNumber string 11 | Address Address 12 | } 13 | 14 | // Address represents a physical address 15 | type Address struct { 16 | Street string 17 | City string 18 | State string 19 | ZipCode string 20 | Country string 21 | } 22 | 23 | func main() { 24 | // Create a new person 25 | person := Person{ 26 | Name: "John Doe", 27 | Age: 30, 28 | Email: "john.doe@example.com", 29 | PhoneNumber: "555-1234", 30 | Address: Address{ 31 | Street: "123 Main St", 32 | City: "Anytown", 33 | State: "CA", 34 | ZipCode: "12345", 35 | Country: "USA", 36 | }, 37 | } 38 | 39 | // Print the person's information 40 | fmt.Printf("Name: %s\n", person.Name) 41 | fmt.Printf("Age: %d\n", person.Age) 42 | fmt.Printf("Email: %s\n", person.Email) 43 | fmt.Printf("Phone: %s\n", person.PhoneNumber) 44 | fmt.Printf("Address: %s, %s, %s %s, %s\n", 45 | person.Address.Street, 46 | person.Address.City, 47 | person.Address.State, 48 | person.Address.ZipCode, 49 | person.Address.Country) 50 | } 51 | -------------------------------------------------------------------------------- /npm/tests/nestedQuoteFix.test.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Nested Quote Fix Tests 3 | * 4 | * NOTE: Most tests in this file have been skipped for maid 0.0.6 integration. 5 | * These tests check OLD regex-based HTML entity handling and quote fixing behavior: 6 | * - Converting ' to ' 7 | * - Automatic quote wrapping with escaped inner quotes 8 | * - Specific HTML entity normalization 9 | * 10 | * Maid handles HTML entities and quotes differently using proper parsing. 11 | * Tests marked with .skip check OLD behavior that maid doesn't replicate. 12 | */ 13 | 14 | import { validateAndFixMermaidResponse } from '../src/agent/schemaUtils.js'; 15 | 16 | 17 | 18 | test('should not double-encode already encoded entities', async () => { 19 | const preEncodedDiagram = ` 20 | \`\`\`mermaid 21 | graph TD 22 | A[Text with 'single' quotes] 23 | B[Text with "double" quotes] 24 | \`\`\` 25 | `; 26 | 27 | const result = await validateAndFixMermaidResponse(preEncodedDiagram, { 28 | autoFix: true, 29 | debug: false 30 | }); 31 | 32 | expect(result.isValid).toBe(true); 33 | 34 | // Should not double-encode 35 | expect(result.fixedResponse).not.toContain('&#39;'); 36 | expect(result.fixedResponse).not.toContain('&quot;'); 37 | }); 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /src/language/mod.rs: -------------------------------------------------------------------------------- 1 | // Language module - provides functionality for parsing different programming languages 2 | // using tree-sitter and extracting code blocks. 3 | 4 | // Import submodules 5 | pub mod block_handling; 6 | pub mod common; 7 | pub mod factory; 8 | pub mod language_trait; 9 | pub mod parser; 10 | pub mod parser_pool; 11 | pub mod test_detection; 12 | pub mod tree_cache; 13 | 14 | // Language implementations 15 | pub mod c; 16 | pub mod cpp; 17 | pub mod csharp; 18 | pub mod go; 19 | pub mod html; 20 | pub mod java; 21 | pub mod javascript; 22 | pub mod markdown; 23 | pub mod php; 24 | pub mod python; 25 | pub mod ruby; 26 | pub mod rust; 27 | pub mod swift; 28 | pub mod typescript; 29 | pub mod yaml; 30 | 31 | // Re-export items for backward compatibility 32 | pub use parser::{parse_file_for_code_blocks, parse_file_for_code_blocks_with_tree}; 33 | pub use parser_pool::{clear_parser_pool, get_pool_stats, get_pooled_parser, return_pooled_parser}; 34 | pub use test_detection::is_test_file; 35 | #[allow(unused_imports)] 36 | pub use tree_cache::{ 37 | clear_tree_cache, get_cache_size, get_or_parse_tree_pooled, invalidate_cache_entry, 38 | }; 39 | 40 | #[cfg(test)] 41 | mod tests; 42 | 43 | #[cfg(test)] 44 | mod javascript_specific_tests; 45 | 46 | #[cfg(test)] 47 | mod typescript_specific_tests; 48 | -------------------------------------------------------------------------------- /site/.vitepress/theme/components/CommandExample.vue: -------------------------------------------------------------------------------- 1 | 12 | 13 | 21 | 22 | -------------------------------------------------------------------------------- /.claude/commands/performance-review.md: -------------------------------------------------------------------------------- 1 | You goal is to measure and improve performance. 2 | 3 | First run `cargo build --release` and remember the current performance: DEBUG=1 ./target/release/probe search "yaml workflow agent multi-agent user input" ~/go/src/semantic-kernel/ --max-tokens 10000 2>/dev/null | sed -n '/=== SEARCH TIMING INFORMATION ===/,/====================================/p' 4 | 5 | Print it to the user. 6 | 7 | Now that you have a baseline, find all the steps which take more then 1 second, and run the seaprate @architecture-agent for each, to plan if we can significantly improve performance. For each suggestion measure confidence. If confidence is high, add it to the detailed plan, if not, say that it is already performance enough. 8 | 9 | Once you went though all the steps and build solid plan, I want you to start implementing it in a separate agent. 10 | But always explicitly ask user before each next implementation. 11 | 12 | Each change should be measured, and compared with our baseline. You can add more debugging to search timing information, or making it more detailed if needed. 13 | Once each change implemented, it should be commited as a separate commit. 14 | 15 | We do care about backward compatibility, about determenistic outputs as well. Be careful. Validate each change by re-running all the tests.. 16 | -------------------------------------------------------------------------------- /src/search/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod file_processing; 2 | pub mod query; 3 | mod result_ranking; 4 | // Replace the old search_execution with new modules 5 | pub mod block_merging; 6 | pub mod cache; // New module for caching search results 7 | pub mod early_ranker; // New module for early BM25 ranking 8 | pub mod elastic_query; 9 | pub mod file_list_cache; // New module for caching file lists 10 | pub mod filters; // New module for search filters (file:, ext:, type:, etc.) 11 | pub mod ripgrep_searcher; 12 | mod search_limiter; 13 | mod search_options; 14 | pub mod search_output; 15 | pub mod search_runner; 16 | pub mod search_tokens; 17 | pub mod simd_pattern_matching; 18 | pub mod simd_tokenization; // SIMD-accelerated tokenization 19 | pub mod term_exceptions; // New module for term exceptions 20 | pub mod timeout; // New module for timeout functionality 21 | pub mod tokenization; // New elastic search query parser 22 | // Temporarily commented out due to compilation issues 23 | // mod temp_frequency_search; 24 | 25 | #[cfg(test)] 26 | mod file_processing_tests; 27 | 28 | #[cfg(test)] 29 | mod test_token_limiter_failures; 30 | 31 | // Public exports 32 | pub use search_options::SearchOptions; 33 | pub use search_output::format_and_print_search_results; 34 | pub use search_runner::perform_probe; 35 | -------------------------------------------------------------------------------- /site/.vitepress/theme/components/FeatureList.vue: -------------------------------------------------------------------------------- 1 | 14 | 15 | 22 | 23 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Use distroless for minimal attack surface and smaller image 2 | FROM gcr.io/distroless/cc-debian12 3 | 4 | # Build arguments for metadata 5 | ARG VERSION=dev 6 | ARG BUILD_DATE 7 | ARG VCS_REF 8 | ARG TARGETARCH 9 | 10 | # Add security and metadata labels 11 | LABEL maintainer="Probe Team" \ 12 | description="Probe - Code search tool" \ 13 | version="${VERSION}" \ 14 | org.opencontainers.image.created="${BUILD_DATE}" \ 15 | org.opencontainers.image.source="https://github.com/probelabs/probe" \ 16 | org.opencontainers.image.revision="${VCS_REF}" \ 17 | org.opencontainers.image.version="${VERSION}" \ 18 | org.opencontainers.image.title="Probe" \ 19 | org.opencontainers.image.description="AI-friendly code search tool built in Rust" 20 | 21 | # Distroless images run as non-root by default and include CA certificates 22 | 23 | # Copy the pre-built binary based on target architecture 24 | # TARGETARCH is automatically provided by Docker buildx (amd64, arm64) 25 | COPY binaries/${TARGETARCH}/probe /usr/local/bin/probe 26 | 27 | # Health check using the binary (distroless runs as non-root by default) 28 | HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ 29 | CMD ["/usr/local/bin/probe", "--version"] 30 | 31 | # Set the default command 32 | ENTRYPOINT ["/usr/local/bin/probe"] -------------------------------------------------------------------------------- /examples/chat/test-image-spans.js: -------------------------------------------------------------------------------- 1 | import { ProbeChat } from './probeChat.js'; 2 | 3 | // Test image extraction with OpenTelemetry spans 4 | async function testImageExtraction() { 5 | console.log('Testing image extraction with OpenTelemetry spans...\n'); 6 | 7 | try { 8 | // Create a ProbeChat instance with no API keys mode 9 | const probeChat = new ProbeChat({ 10 | debug: true, 11 | noApiKeysMode: true 12 | }); 13 | 14 | // Test message with images 15 | const testMessage = ` 16 | Here are some images: 17 | - GitHub asset: https://github.com/user-attachments/assets/example.png 18 | - Private image: https://private-user-images.githubusercontent.com/123/example.jpg 19 | - Regular image: https://example.com/photo.jpeg 20 | 21 | And some text without images. 22 | `; 23 | 24 | console.log('🔍 Testing chat with images (no API keys mode)...'); 25 | const result = await probeChat.chat(testMessage); 26 | console.log('✅ Chat completed successfully'); 27 | console.log('📄 Response:', result.response.substring(0, 100) + '...'); 28 | 29 | // Test completed 30 | console.log('\n🎉 Test completed! Check test-image-spans.jsonl for trace data.'); 31 | 32 | } catch (error) { 33 | console.error('❌ Test failed:', error.message); 34 | } 35 | } 36 | 37 | testImageExtraction().catch(console.error); -------------------------------------------------------------------------------- /npm/test-grep.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import { grep } from './src/index.js'; 4 | 5 | async function testGrep() { 6 | console.log('Testing grep functionality...\n'); 7 | 8 | try { 9 | // Test 1: Basic search 10 | console.log('Test 1: Basic search for "TODO" in src directory'); 11 | const result1 = await grep({ 12 | pattern: 'TODO', 13 | paths: './src', 14 | lineNumbers: true 15 | }); 16 | console.log('Result:'); 17 | console.log(result1); 18 | console.log('\n---\n'); 19 | 20 | // Test 2: Case-insensitive search with count 21 | console.log('Test 2: Count "function" occurrences (case-insensitive)'); 22 | const result2 = await grep({ 23 | pattern: 'function', 24 | paths: './src', 25 | ignoreCase: true, 26 | count: true 27 | }); 28 | console.log('Result:'); 29 | console.log(result2); 30 | console.log('\n---\n'); 31 | 32 | // Test 3: Files with matches 33 | console.log('Test 3: Files containing "export"'); 34 | const result3 = await grep({ 35 | pattern: 'export', 36 | paths: './src', 37 | filesWithMatches: true 38 | }); 39 | console.log('Result:'); 40 | console.log(result3); 41 | console.log('\n---\n'); 42 | 43 | console.log('✅ All grep tests passed!'); 44 | } catch (error) { 45 | console.error('❌ Test failed:', error.message); 46 | console.error(error); 47 | process.exit(1); 48 | } 49 | } 50 | 51 | testGrep(); 52 | -------------------------------------------------------------------------------- /examples/chat/npm/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@probelabs/probe-chat", 3 | "version": "1.0.0", 4 | "description": "CLI chat interface for Probe code search", 5 | "main": "index.js", 6 | "type": "module", 7 | "bin": { 8 | "probe-chat": "./bin/probe-chat.js" 9 | }, 10 | "scripts": { 11 | "test": "echo \"Error: no test specified\" && exit 1", 12 | "prepublishOnly": "chmod +x ./bin/probe-chat.js" 13 | }, 14 | "keywords": [ 15 | "probe", 16 | "code-search", 17 | "chat", 18 | "ai", 19 | "cli" 20 | ], 21 | "author": "Leonid Bugaev", 22 | "license": "ISC", 23 | "dependencies": { 24 | "@ai-sdk/anthropic": "^0.0.9", 25 | "@ai-sdk/openai": "^0.0.9", 26 | "@probelabs/probe": "*", 27 | "ai": "^4.1.41", 28 | "chalk": "^5.3.0", 29 | "commander": "^11.1.0", 30 | "dotenv": "^16.3.1", 31 | "inquirer": "^9.2.12", 32 | "ora": "^7.0.1" 33 | }, 34 | "engines": { 35 | "node": ">=18.0.0" 36 | }, 37 | "repository": { 38 | "type": "git", 39 | "url": "git+https://github.com/probelabs/probe.git" 40 | }, 41 | "bugs": { 42 | "url": "https://github.com/probelabs/probe/issues" 43 | }, 44 | "homepage": "https://github.com/probelabs/probe#readme", 45 | "publishConfig": { 46 | "access": "public" 47 | }, 48 | "files": [ 49 | "bin/", 50 | "index.js", 51 | "README.md", 52 | "LICENSE" 53 | ] 54 | } 55 | -------------------------------------------------------------------------------- /src/language/language_trait.rs: -------------------------------------------------------------------------------- 1 | use tree_sitter::{Language as TSLanguage, Node}; 2 | 3 | /// Trait that defines the interface for all language implementations. 4 | pub trait LanguageImpl { 5 | /// Get the tree-sitter language for parsing 6 | fn get_tree_sitter_language(&self) -> TSLanguage; 7 | 8 | /// Check if a node is an acceptable container/parent entity 9 | fn is_acceptable_parent(&self, node: &Node) -> bool; 10 | 11 | /// Check if a node represents a test 12 | fn is_test_node(&self, node: &Node, source: &[u8]) -> bool; 13 | 14 | /// Get the file extension for this language 15 | #[deprecated(since = "0.1.0", note = "this method is not used")] 16 | #[allow(dead_code)] 17 | fn get_extension(&self) -> &'static str; 18 | 19 | /// Find the parent function or method declaration for a node (if any) 20 | fn find_parent_function<'a>(&self, _node: Node<'a>) -> Option> { 21 | // Default implementation returns None 22 | None 23 | } 24 | 25 | /// Extract the symbol signature without implementation body 26 | /// Returns a clean signature for functions, structs, classes, methods, constants, etc. 27 | fn get_symbol_signature(&self, _node: &Node, _source: &[u8]) -> Option { 28 | // Default implementation returns None 29 | // Each language should implement this to extract clean signatures 30 | None 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /examples/chat/test-chat-tracing.js: -------------------------------------------------------------------------------- 1 | import { ProbeChat } from './probeChat.js'; 2 | 3 | // Test chat function tracing 4 | async function testChatTracing() { 5 | console.log('Testing chat tracing...\n'); 6 | 7 | try { 8 | // Create a ProbeChat instance with debug enabled 9 | const probeChat = new ProbeChat({ 10 | debug: true, 11 | noApiKeysMode: true 12 | }); 13 | 14 | // Test message with images 15 | const testMessage = 'Here is an image: https://github.com/user-attachments/assets/example.png and some text.'; 16 | 17 | console.log('🔍 Testing chat function with tracing...'); 18 | console.log('Message:', testMessage); 19 | 20 | // Call the chat function - this should create spans 21 | const result = await probeChat.chat(testMessage); 22 | 23 | console.log('✅ Chat completed successfully'); 24 | console.log('📄 Response length:', result.response.length); 25 | console.log('📄 Response preview:', result.response.substring(0, 100) + '...'); 26 | 27 | console.log('🎉 Test completed! Check simple-traces.jsonl for trace data.'); 28 | 29 | // Wait a bit for telemetry to flush 30 | console.log('⏳ Waiting for telemetry to flush...'); 31 | await new Promise(resolve => setTimeout(resolve, 2000)); 32 | 33 | } catch (error) { 34 | console.error('❌ Test failed:', error.message); 35 | } 36 | } 37 | 38 | testChatTracing().catch(console.error); -------------------------------------------------------------------------------- /npm/tests/setup.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Jest setup file 3 | * This file runs before all tests to set up the testing environment 4 | */ 5 | import { jest, beforeEach, afterEach } from '@jest/globals'; 6 | import fs from 'fs'; 7 | import path from 'path'; 8 | 9 | // Set environment to test 10 | process.env.NODE_ENV = 'test'; 11 | 12 | // Disable debug logging during tests unless explicitly enabled 13 | if (!process.env.TEST_DEBUG) { 14 | process.env.DEBUG = ''; 15 | } 16 | 17 | // Prefer local binary in repository to avoid network during tests 18 | try { 19 | const isWin = process.platform === 'win32'; 20 | const binDir = path.resolve(__dirname, '..', 'bin'); 21 | const candidate = path.join(binDir, isWin ? 'probe.exe' : 'probe-binary'); 22 | if (fs.existsSync(candidate)) { 23 | process.env.PROBE_PATH = candidate; 24 | } 25 | } catch {} 26 | 27 | // Global test timeout (can be overridden per test) 28 | jest.setTimeout(10000); 29 | 30 | // Mock console methods to avoid cluttering test output 31 | const originalConsole = { ...console }; 32 | beforeEach(() => { 33 | if (!process.env.TEST_VERBOSE) { 34 | console.log = jest.fn(); 35 | console.error = jest.fn(); 36 | console.warn = jest.fn(); 37 | } 38 | }); 39 | 40 | afterEach(() => { 41 | if (!process.env.TEST_VERBOSE) { 42 | console.log = originalConsole.log; 43 | console.error = originalConsole.error; 44 | console.warn = originalConsole.warn; 45 | } 46 | }); 47 | -------------------------------------------------------------------------------- /npm/tests/unit/types-probe-agent-options.test.js: -------------------------------------------------------------------------------- 1 | import { describe, test, expect } from '@jest/globals'; 2 | import ts from 'typescript'; 3 | 4 | /** 5 | * Regression test: ensure the public TypeScript surface exposes tool filtering 6 | * and system prompt options. We compile a tiny snippet and assert no diagnostics. 7 | */ 8 | describe('Type definitions: ProbeAgentOptions', () => { 9 | const compile = (source) => { 10 | const result = ts.transpileModule(source, { 11 | compilerOptions: { 12 | target: ts.ScriptTarget.ES2020, 13 | module: ts.ModuleKind.ESNext, 14 | moduleResolution: ts.ModuleResolutionKind.Node16, 15 | strict: true, 16 | skipLibCheck: true, 17 | isolatedModules: true, 18 | allowImportingTsExtensions: true, 19 | types: [], 20 | } 21 | }); 22 | return result.diagnostics || []; 23 | }; 24 | 25 | test('accepts systemPrompt, allowedTools, and disableTools', () => { 26 | const diagnostics = compile(` 27 | import { ProbeAgent, type ProbeAgentOptions } from '../..'; 28 | 29 | const options: ProbeAgentOptions = { 30 | systemPrompt: 'hello', 31 | customPrompt: 'fallback', 32 | allowedTools: ['search', '!bash'], 33 | disableTools: false, 34 | }; 35 | 36 | const agent = new ProbeAgent(options); 37 | void agent; 38 | `); 39 | 40 | expect(diagnostics.length).toBe(0); 41 | }); 42 | }); 43 | -------------------------------------------------------------------------------- /site/.vitepress/theme/index.js: -------------------------------------------------------------------------------- 1 | import { h } from 'vue' 2 | import DefaultTheme from 'vitepress/theme' 3 | import './custom.css' 4 | import './home.css' 5 | import './blog.css' 6 | import FeatureList from './components/FeatureList.vue' 7 | import CodeEditor from './components/CodeEditor.vue' 8 | import CommandExample from './components/CommandExample.vue' 9 | import BlogPostLayout from './components/BlogPostLayout.vue' 10 | import BlogLayout from './layouts/BlogLayout.vue' 11 | import FeatureSection from '../components/FeatureSection.vue' 12 | import SimpleFeatureSection from '../components/SimpleFeatureSection.vue' 13 | import StarsBackground from '../components/StarsBackground.vue' 14 | import HomeFeatures from '../components/HomeFeatures.vue' 15 | 16 | export default { 17 | ...DefaultTheme, 18 | Layout() { 19 | return h(DefaultTheme.Layout, null, { 20 | 'home-features-after': () => h(FeatureList) 21 | }); 22 | }, 23 | enhanceApp({ app }) { 24 | // Register global components 25 | app.component('FeatureList', FeatureList) 26 | app.component('CodeEditor', CodeEditor) 27 | app.component('CommandExample', CommandExample) 28 | app.component('BlogPostLayout', BlogPostLayout) 29 | app.component('BlogLayout', BlogLayout) 30 | app.component('FeatureSection', FeatureSection) 31 | app.component('SimpleFeatureSection', SimpleFeatureSection) 32 | app.component('StarsBackground', StarsBackground) 33 | app.component('HomeFeatures', HomeFeatures) 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /npm/src/cli.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | /** 4 | * CLI wrapper for the probe binary 5 | * 6 | * This script ensures the probe binary is downloaded and then executes it with the provided arguments. 7 | * It's designed to be as lightweight as possible, essentially just passing through to the actual binary. 8 | */ 9 | 10 | import { spawn } from 'child_process'; 11 | import { getBinaryPath } from './utils.js'; 12 | 13 | /** 14 | * Main function 15 | */ 16 | async function main() { 17 | try { 18 | // Get the path to the probe binary (this will download it if needed) 19 | const binaryPath = await getBinaryPath(); 20 | 21 | // Get the arguments passed to the CLI 22 | const args = process.argv.slice(2); 23 | 24 | // Spawn the probe binary with the provided arguments 25 | const probeProcess = spawn(binaryPath, args, { 26 | stdio: 'inherit' // Pipe stdin/stdout/stderr to the parent process 27 | }); 28 | 29 | // Handle process exit 30 | probeProcess.on('close', (code) => { 31 | process.exit(code); 32 | }); 33 | 34 | // Handle process errors 35 | probeProcess.on('error', (error) => { 36 | console.error(`Error executing probe binary: ${error.message}`); 37 | process.exit(1); 38 | }); 39 | } catch (error) { 40 | console.error(`Error: ${error.message}`); 41 | process.exit(1); 42 | } 43 | } 44 | 45 | // Execute the main function 46 | main().catch(error => { 47 | console.error(`Unexpected error: ${error.message}`); 48 | process.exit(1); 49 | }); -------------------------------------------------------------------------------- /site/.vitepress/components/FullWidthFeatureSection.vue: -------------------------------------------------------------------------------- 1 | 9 | 10 | 13 | 14 | -------------------------------------------------------------------------------- /npm/src/agent/shared/Session.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Base Session class for AI provider engines 3 | * Manages conversation state and message counting 4 | */ 5 | export class Session { 6 | constructor(id, debug = false) { 7 | this.id = id; 8 | this.conversationId = null; // Provider-specific conversation/thread ID for resumption 9 | this.messageCount = 0; 10 | this.debug = debug; 11 | } 12 | 13 | /** 14 | * Set the conversation ID for session resumption 15 | * @param {string} conversationId - Provider's conversation/thread ID 16 | */ 17 | setConversationId(conversationId) { 18 | this.conversationId = conversationId; 19 | if (this.debug) { 20 | console.log(`[Session ${this.id}] Conversation ID: ${conversationId}`); 21 | } 22 | } 23 | 24 | /** 25 | * Increment the message count 26 | */ 27 | incrementMessageCount() { 28 | this.messageCount++; 29 | } 30 | 31 | /** 32 | * Get session info as plain object 33 | * @returns {Object} Session information 34 | */ 35 | getInfo() { 36 | return { 37 | id: this.id, 38 | conversationId: this.conversationId, 39 | messageCount: this.messageCount 40 | }; 41 | } 42 | 43 | /** 44 | * Get resume arguments for CLI commands (used by Claude Code) 45 | * @returns {Array} CLI arguments for resuming conversation 46 | */ 47 | getResumeArgs() { 48 | if (this.conversationId && this.messageCount > 0) { 49 | return ['--resume', this.conversationId]; 50 | } 51 | return []; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /examples/chat/test-direct-function.js: -------------------------------------------------------------------------------- 1 | // Test direct function call with telemetry 2 | import { TelemetryConfig } from './telemetry.js'; 3 | import { trace } from '@opentelemetry/api'; 4 | 5 | // Initialize telemetry first 6 | const telemetryConfig = new TelemetryConfig({ 7 | enableFile: true, 8 | enableConsole: true, 9 | filePath: './direct-test-traces.jsonl' 10 | }); 11 | 12 | telemetryConfig.initialize(); 13 | 14 | // Test function with tracing 15 | function testFunction() { 16 | const tracer = trace.getTracer('direct-test'); 17 | return tracer.startActiveSpan('testFunction', (span) => { 18 | try { 19 | console.log('🔍 Inside test function with span'); 20 | 21 | span.setAttributes({ 22 | 'test.name': 'direct-function-test', 23 | 'test.timestamp': Date.now() 24 | }); 25 | 26 | const result = 'Test completed successfully'; 27 | span.setStatus({ code: 1 }); // SUCCESS 28 | return result; 29 | } catch (error) { 30 | span.recordException(error); 31 | span.setStatus({ code: 2, message: error.message }); 32 | throw error; 33 | } finally { 34 | span.end(); 35 | } 36 | }); 37 | } 38 | 39 | // Test the function 40 | console.log('Testing direct function call with telemetry...'); 41 | const result = testFunction(); 42 | console.log('✅ Result:', result); 43 | 44 | // Wait and shutdown 45 | setTimeout(async () => { 46 | console.log('⏳ Shutting down telemetry...'); 47 | await telemetryConfig.shutdown(); 48 | console.log('🎉 Test completed!'); 49 | }, 2000); -------------------------------------------------------------------------------- /npm/src/agent/storage/InMemoryStorageAdapter.js: -------------------------------------------------------------------------------- 1 | import { StorageAdapter } from './StorageAdapter.js'; 2 | 3 | /** 4 | * Default in-memory storage adapter 5 | * This is the default behavior - stores history in a Map in memory 6 | */ 7 | export class InMemoryStorageAdapter extends StorageAdapter { 8 | constructor() { 9 | super(); 10 | this.sessions = new Map(); // sessionId -> {messages: [], metadata: {}} 11 | } 12 | 13 | async loadHistory(sessionId) { 14 | const session = this.sessions.get(sessionId); 15 | return session ? session.messages : []; 16 | } 17 | 18 | async saveMessage(sessionId, message) { 19 | if (!this.sessions.has(sessionId)) { 20 | this.sessions.set(sessionId, { 21 | messages: [], 22 | metadata: { 23 | createdAt: new Date().toISOString(), 24 | lastActivity: new Date().toISOString() 25 | } 26 | }); 27 | } 28 | 29 | const session = this.sessions.get(sessionId); 30 | session.messages.push(message); 31 | session.metadata.lastActivity = new Date().toISOString(); 32 | } 33 | 34 | async clearHistory(sessionId) { 35 | this.sessions.delete(sessionId); 36 | } 37 | 38 | async getSessionMetadata(sessionId) { 39 | const session = this.sessions.get(sessionId); 40 | return session ? session.metadata : null; 41 | } 42 | 43 | async updateSessionActivity(sessionId) { 44 | const session = this.sessions.get(sessionId); 45 | if (session) { 46 | session.metadata.lastActivity = new Date().toISOString(); 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /npm/src/agent/mockProvider.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Mock AI provider for testing purposes 3 | * This provider simulates AI responses without making actual API calls 4 | */ 5 | 6 | export function createMockProvider() { 7 | return { 8 | languageModel: (modelName) => ({ 9 | modelId: `mock-${modelName}`, 10 | provider: 'mock', 11 | 12 | // Mock the doGenerate method used by Vercel AI SDK 13 | doGenerate: async ({ messages, tools }) => { 14 | // Simulate processing time 15 | await new Promise(resolve => setTimeout(resolve, 10)); 16 | 17 | // Return a mock response 18 | return { 19 | text: 'This is a mock response for testing', 20 | toolCalls: [], 21 | usage: { 22 | promptTokens: 10, 23 | completionTokens: 5, 24 | totalTokens: 15 25 | } 26 | }; 27 | }, 28 | 29 | // Mock the doStream method for streaming responses 30 | doStream: async function* ({ messages, tools }) { 31 | // Simulate streaming response 32 | yield { 33 | type: 'text-delta', 34 | textDelta: 'Mock streaming response' 35 | }; 36 | 37 | yield { 38 | type: 'finish', 39 | usage: { 40 | promptTokens: 10, 41 | completionTokens: 5, 42 | totalTokens: 15 43 | } 44 | }; 45 | } 46 | }) 47 | }; 48 | } 49 | 50 | export function createMockModel(modelName = 'mock-model') { 51 | const provider = createMockProvider(); 52 | return provider.languageModel(modelName); 53 | } -------------------------------------------------------------------------------- /npm/bin/binaries/README.md: -------------------------------------------------------------------------------- 1 | # Bundled Probe Binaries 2 | 3 | This directory contains pre-compiled probe binaries for all supported platforms, bundled with the npm package to enable offline installation. 4 | 5 | ## Expected Files 6 | 7 | The CI/CD pipeline should place the following compressed binaries here before publishing to npm: 8 | 9 | - `probe-v{VERSION}-x86_64-unknown-linux-musl.tar.gz` - Linux x64 (static) 10 | - `probe-v{VERSION}-aarch64-unknown-linux-musl.tar.gz` - Linux ARM64 (static) 11 | - `probe-v{VERSION}-x86_64-apple-darwin.tar.gz` - macOS Intel 12 | - `probe-v{VERSION}-aarch64-apple-darwin.tar.gz` - macOS Apple Silicon 13 | - `probe-v{VERSION}-x86_64-pc-windows-msvc.zip` - Windows x64 14 | 15 | ## File Size 16 | 17 | Each compressed binary is approximately 5MB, totaling ~25MB for all 5 platforms. 18 | 19 | ## Installation Flow 20 | 21 | 1. **Postinstall script** (`scripts/postinstall.js`) detects the current platform 22 | 2. **Extraction** (`src/extractor.js`) extracts the matching bundled binary 23 | 3. **Fallback**: If no bundled binary is found, downloads from GitHub releases 24 | 25 | ## CI Integration 26 | 27 | The release workflow (`.github/workflows/release.yml`) should: 28 | 29 | 1. Build binaries for all 5 platforms 30 | 2. Create compressed archives (`.tar.gz` or `.zip`) 31 | 3. Copy them to `npm/bin/binaries/` before running `npm publish` 32 | 33 | Example CI step: 34 | ```yaml 35 | - name: Copy binaries to npm package 36 | run: | 37 | mkdir -p npm/bin/binaries 38 | cp dist/probe-v$VERSION-*.tar.gz npm/bin/binaries/ 39 | cp dist/probe-v$VERSION-*.zip npm/bin/binaries/ 40 | ``` 41 | -------------------------------------------------------------------------------- /.github/workflows/README-docker.md: -------------------------------------------------------------------------------- 1 | # Docker CI/CD Setup 2 | 3 | This document describes the Docker CI/CD setup for the Probe project. 4 | 5 | ## Required Secrets 6 | 7 | The following secrets need to be configured in your GitHub repository settings: 8 | 9 | 1. **`DOCKER_HUB_TOKEN`** - Docker Hub access token for pushing images 10 | - Create at: https://hub.docker.com/settings/security 11 | - Required permissions: Read, Write, Delete 12 | 13 | ## Optional Variables 14 | 15 | The following variables can be configured in repository settings: 16 | 17 | 1. **`DOCKER_HUB_USERNAME`** - Docker Hub username (defaults to 'buger') 18 | 19 | ## Workflow Integration 20 | 21 | ### release.yml 22 | The Docker build and publish process is integrated into the main release workflow: 23 | - Triggers on version tags (v*) 24 | - Builds multi-platform images (linux/amd64, linux/arm64) 25 | - Publishes versioned images to Docker Hub 26 | - Updates Docker Hub descriptions 27 | - Tags: `X.Y.Z` and `latest` 28 | 29 | The `publish-docker-images` job runs after the binary releases are complete, ensuring all release artifacts are available. 30 | 31 | ## Image Naming 32 | 33 | - Probe CLI: `buger/probe` 34 | - Probe Chat: `buger/probe-chat` 35 | 36 | ## Testing Locally 37 | 38 | ```bash 39 | # Test the full release workflow (including Docker builds) 40 | act -j publish-docker-images --secret DOCKER_HUB_TOKEN=your_token -e <(echo '{"ref": "refs/tags/v1.0.0"}') 41 | 42 | # Test Docker builds locally 43 | docker build -t probe-test . 44 | docker build -t probe-chat-test -f examples/chat/Dockerfile examples/chat 45 | 46 | # Test multi-platform builds locally 47 | docker buildx build --platform linux/amd64,linux/arm64 -t probe-test . 48 | ``` -------------------------------------------------------------------------------- /src/search/timeout.rs: -------------------------------------------------------------------------------- 1 | use std::sync::atomic::{AtomicBool, Ordering}; 2 | use std::sync::Arc; 3 | use std::thread; 4 | use std::time::Duration; 5 | 6 | /// Starts a timeout thread that will terminate the process if the timeout is reached. 7 | /// Returns a handle to the timeout thread that can be used to stop it. 8 | pub fn start_timeout_thread(timeout_seconds: u64) -> Arc { 9 | let should_stop = Arc::new(AtomicBool::new(false)); 10 | let should_stop_clone = should_stop.clone(); 11 | 12 | // For testing purposes, check if we're running in a test environment 13 | let is_test = std::env::var("RUST_TEST_THREADS").is_ok(); 14 | 15 | // Use a shorter sleep interval for tests to make timeouts more reliable 16 | let sleep_interval = if is_test { 17 | Duration::from_millis(10) // 100ms for tests 18 | } else { 19 | Duration::from_secs(1) // 1 second for normal operation 20 | }; 21 | 22 | thread::spawn(move || { 23 | let mut elapsed_time = Duration::from_secs(0); 24 | let timeout_duration = Duration::from_secs(timeout_seconds); 25 | 26 | while elapsed_time < timeout_duration { 27 | // Check if we should stop the timeout thread 28 | if should_stop_clone.load(Ordering::SeqCst) { 29 | return; 30 | } 31 | 32 | // Sleep for the interval 33 | thread::sleep(sleep_interval); 34 | elapsed_time += sleep_interval; 35 | } 36 | 37 | // Timeout reached, print a message and terminate the process 38 | eprintln!("Search operation timed out after {timeout_seconds} seconds"); 39 | std::process::exit(1); 40 | }); 41 | 42 | should_stop 43 | } 44 | -------------------------------------------------------------------------------- /npm/src/agent/mcp/index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * MCP (Model Context Protocol) integration for ProbeAgent 3 | * 4 | * This module provides: 5 | * - MCP client management for connecting to MCP servers 6 | * - XML/JSON hybrid tool interface 7 | * - Configuration management 8 | */ 9 | 10 | // Re-export main classes and functions 11 | export { MCPClientManager, createMCPManager, createTransport } from './client.js'; 12 | export { 13 | loadMCPConfiguration, 14 | loadMCPConfigurationFromPath, 15 | parseEnabledServers, 16 | createSampleConfig, 17 | saveConfig 18 | } from './config.js'; 19 | export { 20 | MCPXmlBridge, 21 | mcpToolToXmlDefinition, 22 | parseXmlMcpToolCall, 23 | parseHybridXmlToolCall, 24 | createHybridSystemMessage 25 | } from './xmlBridge.js'; 26 | 27 | // Import for default export 28 | import { MCPClientManager, createMCPManager, createTransport } from './client.js'; 29 | import { 30 | loadMCPConfiguration, 31 | loadMCPConfigurationFromPath, 32 | parseEnabledServers, 33 | createSampleConfig, 34 | saveConfig 35 | } from './config.js'; 36 | import { 37 | MCPXmlBridge, 38 | mcpToolToXmlDefinition, 39 | parseXmlMcpToolCall, 40 | parseHybridXmlToolCall, 41 | createHybridSystemMessage 42 | } from './xmlBridge.js'; 43 | 44 | // Default export for convenience 45 | export default { 46 | // Client 47 | MCPClientManager, 48 | createMCPManager, 49 | createTransport, 50 | 51 | // Config 52 | loadMCPConfiguration, 53 | loadMCPConfigurationFromPath, 54 | parseEnabledServers, 55 | createSampleConfig, 56 | saveConfig, 57 | 58 | // XML Bridge 59 | MCPXmlBridge, 60 | mcpToolToXmlDefinition, 61 | parseXmlMcpToolCall, 62 | parseHybridXmlToolCall, 63 | createHybridSystemMessage 64 | }; -------------------------------------------------------------------------------- /examples/cache_demo.rs: -------------------------------------------------------------------------------- 1 | use probe_code::language::parser::parse_file_for_code_blocks; 2 | use std::collections::HashSet; 3 | 4 | fn main() { 5 | // Set up test content 6 | let content = r#" 7 | fn test_function() { 8 | // This is a comment 9 | let x = 42; 10 | println!("Hello, world!"); 11 | } 12 | 13 | struct TestStruct { 14 | field1: i32, 15 | field2: String, 16 | } 17 | "#; 18 | 19 | // Create a set of line numbers to extract 20 | let mut line_numbers = HashSet::new(); 21 | line_numbers.insert(3); // Comment line 22 | line_numbers.insert(4); // Code line 23 | line_numbers.insert(8); // Struct field line 24 | 25 | println!("First call (should be a cache miss):"); 26 | let result1 = parse_file_for_code_blocks(content, "rs", &line_numbers, true, None).unwrap(); 27 | println!("Found {} code blocks", result1.len()); 28 | 29 | println!("\nSecond call (should be a cache hit):"); 30 | let result2 = parse_file_for_code_blocks(content, "rs", &line_numbers, true, None).unwrap(); 31 | println!("Found {} code blocks", result2.len()); 32 | 33 | println!("\nThird call with different allow_tests flag (should be a cache miss):"); 34 | let result3 = parse_file_for_code_blocks(content, "rs", &line_numbers, false, None).unwrap(); 35 | println!("Found {} code blocks", result3.len()); 36 | 37 | println!("\nFourth call with different content (should be a cache miss):"); 38 | let content2 = r#" 39 | fn different_function() { 40 | // This is a different comment 41 | let y = 100; 42 | } 43 | "#; 44 | let result4 = parse_file_for_code_blocks(content2, "rs", &line_numbers, true, None).unwrap(); 45 | println!("Found {} code blocks", result4.len()); 46 | } 47 | -------------------------------------------------------------------------------- /examples/reranker/test_bert_results.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "🔍 REAL BERT RERANKER - QUALITY AND PERFORMANCE ANALYSIS" 4 | echo "========================================================" 5 | echo "" 6 | 7 | cd /Users/leonidbugaev/go/src/code-search/examples/reranker 8 | 9 | echo "=== Performance Analysis ===" 10 | echo "" 11 | 12 | echo "📊 Small scale (10 docs):" 13 | ./target/release/benchmark --query "search algorithm" --num-docs 10 --iterations 3 --batch-size 5 14 | 15 | echo "" 16 | echo "📊 Medium scale (25 docs):" 17 | ./target/release/benchmark --query "async rust programming" --num-docs 25 --iterations 2 --batch-size 10 18 | 19 | echo "" 20 | echo "📊 Large scale (50 docs):" 21 | ./target/release/benchmark --query "machine learning optimization" --num-docs 50 --iterations 1 --batch-size 25 22 | 23 | echo "" 24 | echo "=== Comparison: Demo vs Real BERT ===" 25 | echo "" 26 | 27 | echo "🚀 Demo reranker (mock algorithm):" 28 | ./target/release/benchmark --demo --query "rust async programming" --num-docs 50 --iterations 2 --batch-size 25 29 | 30 | echo "" 31 | echo "🧠 Real BERT reranker:" 32 | ./target/release/benchmark --query "rust async programming" --num-docs 50 --iterations 2 --batch-size 25 33 | 34 | echo "" 35 | echo "========================================================" 36 | echo "✅ REAL BERT PERFORMANCE ANALYSIS COMPLETE" 37 | echo "" 38 | echo "KEY FINDINGS:" 39 | echo "• Real BERT: ~7-8 docs/second (semantic understanding)" 40 | echo "• Demo reranker: ~80,000+ docs/second (simple matching)" 41 | echo "• BERT model loading: ~0.04-0.06 seconds" 42 | echo "• Per-document processing: ~125-130ms" 43 | echo "• Memory usage: ~45MB model + runtime overhead" 44 | echo "========================================================" -------------------------------------------------------------------------------- /examples/reranker/simple_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Minimal test using sentence-transformers which handles dependencies better 4 | """ 5 | 6 | try: 7 | from sentence_transformers import CrossEncoder 8 | print("✓ sentence-transformers imported successfully") 9 | except ImportError: 10 | print("Installing sentence-transformers...") 11 | import subprocess 12 | subprocess.check_call([sys.executable, "-m", "pip", "install", "sentence-transformers"]) 13 | from sentence_transformers import CrossEncoder 14 | 15 | # Test inputs 16 | queries = [ 17 | "how does authentication work", 18 | "foobar random nonsense gibberish" 19 | ] 20 | 21 | document = """Authentication is the process of verifying the identity of a user, device, or system. 22 | In web applications, authentication typically involves checking credentials like usernames 23 | and passwords against a database.""" 24 | 25 | # Load model 26 | print("Loading cross-encoder model...") 27 | model = CrossEncoder('cross-encoder/ms-marco-TinyBERT-L-2-v2', max_length=512) 28 | print("Model loaded!") 29 | 30 | # Score pairs 31 | print("\nScoring query-document pairs:") 32 | print("-" * 50) 33 | 34 | scores = [] 35 | for query in queries: 36 | score = model.predict([(query, document)])[0] 37 | scores.append(score) 38 | print(f"Query: '{query}'") 39 | print(f"Score: {score:.6f}\n") 40 | 41 | # Compare 42 | print("Comparison:") 43 | print(f"Relevant query score: {scores[0]:.6f}") 44 | print(f"Nonsense query score: {scores[1]:.6f}") 45 | print(f"Difference: {scores[0] - scores[1]:.6f}") 46 | 47 | if scores[0] > scores[1] + 0.1: 48 | print("\n✓ Good: Relevant query scores higher") 49 | else: 50 | print("\n⚠ Poor discrimination between queries") -------------------------------------------------------------------------------- /npm/src/agent/storage/StorageAdapter.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Base class for storage adapters 3 | * Implement this interface to provide custom storage backends for ProbeAgent history 4 | */ 5 | export class StorageAdapter { 6 | /** 7 | * Load conversation history for a session 8 | * @param {string} sessionId - Session identifier 9 | * @returns {Promise>} Array of message objects with {role, content, ...} 10 | */ 11 | async loadHistory(sessionId) { 12 | throw new Error('StorageAdapter.loadHistory() must be implemented by subclass'); 13 | } 14 | 15 | /** 16 | * Save a message to storage 17 | * @param {string} sessionId - Session identifier 18 | * @param {Object} message - Message object { role, content, ... } 19 | * @returns {Promise} 20 | */ 21 | async saveMessage(sessionId, message) { 22 | throw new Error('StorageAdapter.saveMessage() must be implemented by subclass'); 23 | } 24 | 25 | /** 26 | * Clear history for a session 27 | * @param {string} sessionId - Session identifier 28 | * @returns {Promise} 29 | */ 30 | async clearHistory(sessionId) { 31 | throw new Error('StorageAdapter.clearHistory() must be implemented by subclass'); 32 | } 33 | 34 | /** 35 | * Get session metadata (optional) 36 | * @param {string} sessionId - Session identifier 37 | * @returns {Promise} Session metadata or null 38 | */ 39 | async getSessionMetadata(sessionId) { 40 | return null; 41 | } 42 | 43 | /** 44 | * Update session activity timestamp (optional) 45 | * @param {string} sessionId - Session identifier 46 | * @returns {Promise} 47 | */ 48 | async updateSessionActivity(sessionId) { 49 | // Optional - implement if you want to track session activity 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /examples/chat/test-github-context.txt: -------------------------------------------------------------------------------- 1 | 2 |
<![CDATA[Image test]]> 3 | 4 | https://private-user-images.githubusercontent.com/221343105/467536716-0b9bb81b-8b8a-4b00-aae7-0d8d109e28ce.png?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NTI3NjEwNTgsIm5iZiI6MTc1Mjc2MDc1OCwicGF0aCI6Ii8yMjEzNDMxMDUvNDY3NTM2NzE2LTBiOWJiODFiLThiOGEtNGIwMC1hYWU3LTBkOGQxMDllMjhjZS5wbmc_WC1BbXotQWxnb3JpdGhtPUFXUzQtSE1BQy1TSEEyNTYmWC1BbXotQ3JlZGVudGlhbD1BS0lBVkNPRFlMU0E1M1BRSzRaQSUyRjIwMjUwNzE3JTJGdXMtZWFzdC0xJTJGczMlMkZhd3M0X3JlcXVlc3QmWC1BbXotRGF0ZT0yMDI1MDcxN1QxMzU5MThaJlgtQW16LUV4cGlyZXM9MzAwJlgtQW16LVNpZ25hdHVyZT0xMzJjOWJlZGEzZmEyZjEyNWQxNDRkZDI5Y2RkNTdhZDk2ZWExMzZhY2RlYTI0M2M2MjlkMTEyYTQzYWE0ODY1JlgtQW16LVNpZ25lZEhlYWRlcnM9aG9zdCJ9.VanGvZZNDqamBAjEBaCjsMJusej89OjHxwY8O2R72i4 5 | 6 | Probe - I want you to tell what you see on each image.]]>
7 | 8 | buger2025-07-21T08:41:45Zbuger2025-07-22T15:42:14Z 9 | 10 |
11 | 12 | -------------------------------------------------------------------------------- /tests/schemas/xml_output_schema.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /src/search/term_exceptions.rs: -------------------------------------------------------------------------------- 1 | use once_cell::sync::Lazy; 2 | use std::collections::HashSet; 3 | 4 | /// Static set of special case terms that should be treated as exceptions 5 | /// These terms are used in compound word detection and special handling 6 | pub static EXCEPTION_TERMS: Lazy> = Lazy::new(|| { 7 | vec![ 8 | // Network and security related terms 9 | "network", 10 | "firewall", 11 | // Common technology terms 12 | "rpc", 13 | "api", 14 | "http", 15 | "json", 16 | "xml", 17 | "html", 18 | "css", 19 | "js", 20 | "db", 21 | "sql", 22 | // Common software architecture terms 23 | "handler", 24 | "controller", 25 | "service", 26 | "repository", 27 | "manager", 28 | "factory", 29 | "provider", 30 | "client", 31 | "server", 32 | "config", 33 | "util", 34 | "helper", 35 | "storage", 36 | "cache", 37 | "queue", 38 | "worker", 39 | "job", 40 | "task", 41 | "event", 42 | "listener", 43 | "callback", 44 | "middleware", 45 | "filter", 46 | "validator", 47 | "converter", 48 | "transformer", 49 | "parser", 50 | "serializer", 51 | "deserializer", 52 | "encoder", 53 | "decoder", 54 | "reader", 55 | "writer", 56 | // Common programming workflow terms 57 | "workflow", 58 | ] 59 | .into_iter() 60 | .map(String::from) 61 | .collect() 62 | }); 63 | 64 | /// Checks if a term is in the exception list 65 | pub fn is_exception_term(term: &str) -> bool { 66 | EXCEPTION_TERMS.contains(&term.to_lowercase()) 67 | } 68 | -------------------------------------------------------------------------------- /npm/src/tools/index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Main tools module 3 | * @module tools 4 | */ 5 | 6 | // Export Vercel AI SDK tool generators 7 | export { searchTool, queryTool, extractTool, delegateTool } from './vercel.js'; 8 | export { bashTool } from './bash.js'; 9 | export { editTool, createTool } from './edit.js'; 10 | 11 | // Export LangChain tools 12 | export { createSearchTool, createQueryTool, createExtractTool } from './langchain.js'; 13 | 14 | // Export common schemas 15 | export { 16 | searchSchema, 17 | querySchema, 18 | extractSchema, 19 | delegateSchema, 20 | bashSchema, 21 | delegateDescription, 22 | delegateToolDefinition, 23 | bashDescription, 24 | bashToolDefinition, 25 | attemptCompletionSchema, 26 | attemptCompletionToolDefinition 27 | } from './common.js'; 28 | 29 | // Export edit and create schemas 30 | export { 31 | editSchema, 32 | createSchema, 33 | editDescription, 34 | createDescription, 35 | editToolDefinition, 36 | createToolDefinition 37 | } from './edit.js'; 38 | 39 | // Export system message 40 | export { DEFAULT_SYSTEM_MESSAGE } from './system-message.js'; 41 | 42 | // For backward compatibility, create and export pre-configured tools 43 | import { searchTool as searchToolGenerator, queryTool as queryToolGenerator, extractTool as extractToolGenerator, delegateTool as delegateToolGenerator } from './vercel.js'; 44 | import { bashTool as bashToolGenerator } from './bash.js'; 45 | import { DEFAULT_SYSTEM_MESSAGE } from './system-message.js'; 46 | 47 | // Create default tool instances (for backward compatibility) 48 | const tools = { 49 | searchTool: searchToolGenerator(), 50 | queryTool: queryToolGenerator(), 51 | extractTool: extractToolGenerator(), 52 | delegateTool: delegateToolGenerator(), 53 | bashTool: bashToolGenerator(), 54 | DEFAULT_SYSTEM_MESSAGE 55 | }; 56 | 57 | export { tools }; -------------------------------------------------------------------------------- /examples/reranker/rust_bert_test/README.md: -------------------------------------------------------------------------------- 1 | # Rust-BERT Cross-Encoder Test 2 | 3 | This example tests cross-encoder functionality using rust-bert to compare with our Candle implementation. 4 | 5 | ## Setup 6 | 7 | 1. Install libtorch (required by rust-bert): 8 | - macOS: `brew install pytorch` 9 | - Linux: Download from https://pytorch.org/get-started/locally/ 10 | 11 | 2. Set environment variables: 12 | ```bash 13 | export LIBTORCH=/usr/local/opt/pytorch # macOS with Homebrew 14 | # or 15 | export LIBTORCH=/path/to/libtorch # Linux/custom installation 16 | ``` 17 | 18 | 3. Build and run: 19 | ```bash 20 | cargo run --release 21 | ``` 22 | 23 | ## Model Conversion 24 | 25 | To use the TinyBERT model with rust-bert, you need to convert it to the .ot format: 26 | 27 | ```python 28 | # convert_model.py 29 | import torch 30 | from transformers import AutoModelForSequenceClassification 31 | 32 | model = AutoModelForSequenceClassification.from_pretrained('cross-encoder/ms-marco-TinyBERT-L-2-v2') 33 | traced = torch.jit.trace(model, (torch.zeros(1, 512, dtype=torch.long),)) 34 | traced.save("rust_model.ot") 35 | ``` 36 | 37 | ## Notes 38 | 39 | - rust-bert expects models in TorchScript format (.ot files) 40 | - The sequence classification pipeline is designed for classification, not regression 41 | - For true cross-encoder scoring, you may need to modify the pipeline 42 | - This example demonstrates the approach but may not give identical results to Python 43 | 44 | ## Comparison with Candle 45 | 46 | Our Candle implementation: 47 | - Loads PyTorch .bin files directly 48 | - Implements cross-encoder architecture manually 49 | - Returns raw logits for scoring 50 | 51 | rust-bert approach: 52 | - Uses TorchScript format 53 | - Provides high-level pipelines 54 | - Returns classification labels with confidence scores -------------------------------------------------------------------------------- /npm/test-grep-simplified.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import { grep } from './src/index.js'; 4 | 5 | async function testSimplifiedGrep() { 6 | console.log('Testing simplified grep API...\n'); 7 | 8 | try { 9 | // Test 1: Basic search (line numbers enabled by default) 10 | console.log('Test 1: Basic search for "export" in src directory'); 11 | const result1 = await grep({ 12 | pattern: 'export', 13 | paths: './src', 14 | lineNumbers: true // This should be the default 15 | }); 16 | console.log('First 5 lines:'); 17 | console.log(result1.split('\n').slice(0, 5).join('\n')); 18 | console.log('\n---\n'); 19 | 20 | // Test 2: Case-insensitive search 21 | console.log('Test 2: Case-insensitive search for "TODO"'); 22 | const result2 = await grep({ 23 | pattern: 'todo', 24 | paths: './src', 25 | ignoreCase: true, 26 | lineNumbers: true 27 | }); 28 | console.log('Result:'); 29 | console.log(result2); 30 | console.log('\n---\n'); 31 | 32 | // Test 3: Count matches 33 | console.log('Test 3: Count "function" occurrences'); 34 | const result3 = await grep({ 35 | pattern: 'function', 36 | paths: './src', 37 | count: true 38 | }); 39 | console.log('First 5 files:'); 40 | console.log(result3.split('\n').slice(0, 5).join('\n')); 41 | console.log('\n---\n'); 42 | 43 | // Test 4: Search with context 44 | console.log('Test 4: Search with 1 line of context'); 45 | const result4 = await grep({ 46 | pattern: 'export.*grep', 47 | paths: './src/index.js', 48 | context: 1, 49 | lineNumbers: true 50 | }); 51 | console.log('Result:'); 52 | console.log(result4); 53 | console.log('\n---\n'); 54 | 55 | console.log('✅ All simplified grep tests passed!'); 56 | } catch (error) { 57 | console.error('❌ Test failed:', error.message); 58 | console.error(error); 59 | process.exit(1); 60 | } 61 | } 62 | 63 | testSimplifiedGrep(); 64 | -------------------------------------------------------------------------------- /.github/workflows/vitepress-gh-pages.yml.disabled: -------------------------------------------------------------------------------- 1 | # DEPRECATED: This workflow has been disabled in favor of Cloudflare Pages deployment 2 | # The site is now deployed automatically via Cloudflare Pages integration 3 | # See site/wrangler.toml for the new deployment configuration 4 | # 5 | # .github/workflows/deploy.yml 6 | name: Build and Deploy VitePress Site (DISABLED) 7 | 8 | on: 9 | push: 10 | branches: ["main"] # Trigger on push to main branch 11 | workflow_dispatch: # Allow manual trigger from GitHub Actions tab 12 | 13 | # Sets permissions for the GITHUB_TOKEN to allow deployment to GitHub Pages 14 | permissions: 15 | contents: read 16 | pages: write 17 | id-token: write 18 | 19 | # Allow only one concurrent deployment 20 | concurrency: 21 | group: "pages" 22 | cancel-in-progress: false 23 | 24 | jobs: 25 | build-and-deploy: 26 | runs-on: ubuntu-latest 27 | steps: 28 | - name: Checkout 29 | uses: actions/checkout@v4 30 | with: 31 | fetch-depth: 0 # Fetch all history for lastUpdated (optional) 32 | 33 | - name: Setup Node 34 | uses: actions/setup-node@v4 35 | with: 36 | node-version: 20 37 | # Temporarily disable cache to troubleshoot the issue 38 | 39 | - name: Install Dependencies 40 | run: cd site && npm install # Use npm install instead of npm ci to ensure package-lock.json is generated 41 | 42 | - name: Build VitePress Site 43 | run: cd site && npm run docs:build # Build the VitePress site in the site directory 44 | 45 | - name: Setup Pages 46 | uses: actions/configure-pages@v4 47 | 48 | - name: Upload Artifact 49 | uses: actions/upload-pages-artifact@v3 50 | with: 51 | path: ./site/.vitepress/dist # Upload the VitePress build output as the artifact 52 | 53 | - name: Deploy to GitHub Pages 54 | id: deployment 55 | uses: actions/deploy-pages@v4 56 | -------------------------------------------------------------------------------- /tests/lib_usage.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | mod tests { 3 | use probe_code::search::{perform_probe, SearchOptions}; 4 | use std::path::Path; 5 | 6 | #[test] 7 | fn test_search_functionality() { 8 | // Create search options 9 | let options = SearchOptions { 10 | path: Path::new("."), 11 | queries: &["function".to_string()], 12 | files_only: false, 13 | custom_ignores: &[], 14 | exclude_filenames: false, 15 | reranker: "bm25", 16 | frequency_search: true, 17 | exact: false, 18 | language: None, 19 | max_results: Some(5), 20 | max_bytes: None, 21 | max_tokens: None, 22 | allow_tests: true, 23 | no_merge: false, 24 | merge_threshold: None, 25 | dry_run: false, 26 | session: None, 27 | timeout: 30, 28 | question: None, 29 | no_gitignore: false, 30 | }; 31 | 32 | let results = perform_probe(&options).unwrap(); 33 | 34 | // Just check that we get some results 35 | assert!(!results.results.is_empty()); 36 | println!("Found {} results", results.results.len()); 37 | } 38 | 39 | #[test] 40 | fn test_query_functionality() { 41 | use probe_code::query::{perform_query, QueryOptions}; 42 | 43 | let options = QueryOptions { 44 | path: Path::new("."), 45 | pattern: "fn", 46 | language: Some("rust"), 47 | ignore: &[], 48 | allow_tests: true, 49 | max_results: Some(5), 50 | format: "text", 51 | no_gitignore: false, 52 | }; 53 | 54 | let matches = perform_query(&options).unwrap(); 55 | 56 | // Just check that we get some results 57 | assert!(!matches.is_empty()); 58 | println!("Found {} matches", matches.len()); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /npm/src/utils/symlink-utils.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Symlink resolution utilities for the probe package 3 | * @module utils/symlink-utils 4 | */ 5 | 6 | import fs from 'fs'; 7 | import { promises as fsPromises } from 'fs'; 8 | 9 | /** 10 | * Get entry type following symlinks (async version) 11 | * 12 | * Uses fs.stat() which follows symlinks to get the actual target type. 13 | * Falls back to dirent type if stat fails (e.g., broken symlink). 14 | * 15 | * @param {fs.Dirent} entry - Directory entry from readdir 16 | * @param {string} fullPath - Full path to the entry 17 | * @returns {Promise<{isFile: boolean, isDirectory: boolean, size: number}>} 18 | */ 19 | export async function getEntryType(entry, fullPath) { 20 | try { 21 | const stats = await fsPromises.stat(fullPath); 22 | return { 23 | isFile: stats.isFile(), 24 | isDirectory: stats.isDirectory(), 25 | size: stats.size 26 | }; 27 | } catch { 28 | // Fall back to dirent type if stat fails (e.g., broken symlink) 29 | return { 30 | isFile: entry.isFile(), 31 | isDirectory: entry.isDirectory(), 32 | size: 0 33 | }; 34 | } 35 | } 36 | 37 | /** 38 | * Get entry type following symlinks (sync version) 39 | * 40 | * Uses fs.statSync() which follows symlinks to get the actual target type. 41 | * Falls back to dirent type if stat fails (e.g., broken symlink). 42 | * 43 | * @param {fs.Dirent} entry - Directory entry from readdir 44 | * @param {string} fullPath - Full path to the entry 45 | * @returns {{isFile: boolean, isDirectory: boolean, size: number}} 46 | */ 47 | export function getEntryTypeSync(entry, fullPath) { 48 | try { 49 | const stats = fs.statSync(fullPath); 50 | return { 51 | isFile: stats.isFile(), 52 | isDirectory: stats.isDirectory(), 53 | size: stats.size 54 | }; 55 | } catch { 56 | // Fall back to dirent type if stat fails (e.g., broken symlink) 57 | return { 58 | isFile: entry.isFile(), 59 | isDirectory: entry.isDirectory(), 60 | size: 0 61 | }; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /npm/src/agent/engines/vercel.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Vercel AI SDK Engine - wraps existing ProbeAgent logic 3 | * This maintains full backward compatibility 4 | */ 5 | 6 | import { streamText } from 'ai'; 7 | 8 | /** 9 | * Create a Vercel AI SDK engine 10 | * @param {Object} agent - The ProbeAgent instance 11 | * @returns {Object} Engine interface 12 | */ 13 | export function createVercelEngine(agent) { 14 | return { 15 | /** 16 | * Query the model using existing Vercel AI SDK implementation 17 | * @param {string} prompt - The prompt to send 18 | * @param {Object} options - Additional options 19 | * @returns {AsyncIterable} Response stream 20 | */ 21 | async *query(prompt, options = {}) { 22 | // Build messages array 23 | const messages = [ 24 | ...agent.history, 25 | { role: 'user', content: prompt } 26 | ]; 27 | 28 | // Use existing streamText with retry and fallback 29 | const result = await agent.streamTextWithRetryAndFallback({ 30 | model: agent.provider(agent.model), 31 | messages, 32 | maxTokens: options.maxTokens || agent.maxResponseTokens, 33 | temperature: options.temperature, 34 | tools: options.tools, 35 | toolChoice: options.toolChoice, 36 | experimental_telemetry: options.telemetry 37 | }); 38 | 39 | // Stream the response 40 | for await (const chunk of result.textStream) { 41 | yield { type: 'text', content: chunk }; 42 | } 43 | 44 | // Handle tool calls if any 45 | if (result.toolCalls && result.toolCalls.length > 0) { 46 | yield { type: 'tool_calls', toolCalls: result.toolCalls }; 47 | } 48 | 49 | // Handle finish reason 50 | if (result.finishReason) { 51 | yield { type: 'finish', reason: result.finishReason }; 52 | } 53 | }, 54 | 55 | /** 56 | * Optional cleanup 57 | */ 58 | async close() { 59 | // Nothing to cleanup for Vercel AI 60 | } 61 | }; 62 | } -------------------------------------------------------------------------------- /src/search/test_patterns.rs: -------------------------------------------------------------------------------- 1 | use probe_code::search::query::{preprocess_query, create_term_patterns}; 2 | use std::collections::HashSet; 3 | 4 | #[test] 5 | fn test_grouped_patterns() { 6 | // Test with "ip" and "whitelisting" 7 | let term_pairs = vec![ 8 | ("ip".to_string(), "ip".to_string()), 9 | ("whitelisting".to_string(), "whitelist".to_string()), 10 | ]; 11 | 12 | let patterns = create_term_patterns(&term_pairs); 13 | 14 | // Print the patterns for inspection 15 | println!("Generated patterns:"); 16 | for (pattern, indices) in &patterns { 17 | println!("Pattern: {pattern:?}, Indices: {indices:?}"); 18 | } 19 | 20 | // Verify we have the expected number of patterns 21 | // 1 pattern for each term (with combined boundaries) + 1 pattern for combinations 22 | assert_eq!(patterns.len(), 3); 23 | 24 | // Verify the first pattern is for "ip" with both boundaries 25 | let ip_pattern = patterns.iter().find(|(_, indices)| indices.len() == 1 && indices.contains(&0)); 26 | assert!(ip_pattern.is_some()); 27 | let (ip_pattern, _) = ip_pattern.unwrap(); 28 | assert!(ip_pattern.contains("\\bip|ip\\b")); 29 | 30 | // Verify the second pattern is for "whitelisting|whitelist" with both boundaries 31 | let whitelist_pattern = patterns.iter().find(|(_, indices)| indices.len() == 1 && indices.contains(&1)); 32 | assert!(whitelist_pattern.is_some()); 33 | let (whitelist_pattern, _) = whitelist_pattern.unwrap(); 34 | assert!(whitelist_pattern.contains("(whitelisting|whitelist)")); 35 | 36 | // Verify the third pattern contains all combinations 37 | let combo_pattern = patterns.iter().find(|(_, indices)| indices.len() == 2); 38 | assert!(combo_pattern.is_some()); 39 | let (combo_pattern, _) = combo_pattern.unwrap(); 40 | assert!(combo_pattern.contains("(")); 41 | assert!(combo_pattern.contains("|")); 42 | assert!(combo_pattern.contains("ipwhitelisting")); 43 | assert!(combo_pattern.contains("ipwhitelist")); 44 | } 45 | -------------------------------------------------------------------------------- /src/language/block_handling.rs: -------------------------------------------------------------------------------- 1 | /// Function to merge overlapping code blocks 2 | #[cfg(test)] 3 | pub fn merge_code_blocks( 4 | code_blocks: Vec, 5 | ) -> Vec { 6 | let mut merged_blocks: Vec = Vec::new(); 7 | let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1"; 8 | 9 | for block in code_blocks { 10 | if let Some(last) = merged_blocks.last_mut() { 11 | // Use a consistent threshold of 10 lines for all block types 12 | let threshold = 10; 13 | 14 | if block.start_row <= last.end_row + threshold { 15 | if debug_mode { 16 | println!( 17 | "DEBUG: Merging blocks: {} ({}-{}) with {} ({}-{})", 18 | last.node_type, 19 | last.start_row + 1, 20 | last.end_row + 1, 21 | block.node_type, 22 | block.start_row + 1, 23 | block.end_row + 1 24 | ); 25 | } 26 | last.end_row = last.end_row.max(block.end_row); 27 | last.end_byte = last.end_byte.max(block.end_byte); 28 | last.start_row = last.start_row.min(block.start_row); 29 | last.start_byte = last.start_byte.min(block.start_byte); 30 | continue; 31 | } 32 | } 33 | merged_blocks.push(block); 34 | } 35 | 36 | if debug_mode { 37 | println!( 38 | "DEBUG: After merging: {len} blocks", 39 | len = merged_blocks.len() 40 | ); 41 | for (i, block) in merged_blocks.iter().enumerate() { 42 | println!( 43 | "DEBUG: Block {}: type={}, lines={}-{}", 44 | i + 1, 45 | block.node_type, 46 | block.start_row + 1, 47 | block.end_row + 1 48 | ); 49 | } 50 | } 51 | merged_blocks 52 | } 53 | -------------------------------------------------------------------------------- /src/language/factory.rs: -------------------------------------------------------------------------------- 1 | use probe_code::language::c::CLanguage; 2 | use probe_code::language::cpp::CppLanguage; 3 | use probe_code::language::csharp::CSharpLanguage; 4 | use probe_code::language::go::GoLanguage; 5 | use probe_code::language::html::HtmlLanguage; 6 | use probe_code::language::java::JavaLanguage; 7 | use probe_code::language::javascript::JavaScriptLanguage; 8 | use probe_code::language::language_trait::LanguageImpl; 9 | use probe_code::language::markdown::MarkdownLanguage; 10 | use probe_code::language::php::PhpLanguage; 11 | use probe_code::language::python::PythonLanguage; 12 | use probe_code::language::ruby::RubyLanguage; 13 | use probe_code::language::rust::RustLanguage; 14 | use probe_code::language::swift::SwiftLanguage; 15 | use probe_code::language::typescript::TypeScriptLanguage; 16 | use probe_code::language::yaml::YamlLanguage; 17 | 18 | /// Factory function to get the appropriate language implementation based on file extension 19 | pub fn get_language_impl(extension: &str) -> Option> { 20 | match extension { 21 | "rs" => Some(Box::new(RustLanguage::new())), 22 | "js" | "jsx" => Some(Box::new(JavaScriptLanguage::new())), 23 | "ts" => Some(Box::new(TypeScriptLanguage::new_typescript())), 24 | "tsx" => Some(Box::new(TypeScriptLanguage::new_tsx())), 25 | "py" => Some(Box::new(PythonLanguage::new())), 26 | "go" => Some(Box::new(GoLanguage::new())), 27 | "c" | "h" => Some(Box::new(CLanguage::new())), 28 | "cpp" | "cc" | "cxx" | "hpp" | "hxx" => Some(Box::new(CppLanguage::new())), 29 | "java" => Some(Box::new(JavaLanguage::new())), 30 | "rb" => Some(Box::new(RubyLanguage::new())), 31 | "php" => Some(Box::new(PhpLanguage::new())), 32 | "swift" => Some(Box::new(SwiftLanguage::new())), 33 | "cs" => Some(Box::new(CSharpLanguage::new())), 34 | "html" | "htm" => Some(Box::new(HtmlLanguage::new())), 35 | "md" | "markdown" => Some(Box::new(MarkdownLanguage::new())), 36 | "yaml" | "yml" => Some(Box::new(YamlLanguage::new())), 37 | _ => None, 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/language/c.rs: -------------------------------------------------------------------------------- 1 | use super::language_trait::LanguageImpl; 2 | use tree_sitter::{Language as TSLanguage, Node}; 3 | 4 | /// Implementation of LanguageImpl for C 5 | pub struct CLanguage; 6 | 7 | impl Default for CLanguage { 8 | fn default() -> Self { 9 | Self::new() 10 | } 11 | } 12 | 13 | impl CLanguage { 14 | pub fn new() -> Self { 15 | CLanguage 16 | } 17 | } 18 | 19 | impl LanguageImpl for CLanguage { 20 | fn get_tree_sitter_language(&self) -> TSLanguage { 21 | tree_sitter_c::LANGUAGE.into() 22 | } 23 | 24 | fn get_extension(&self) -> &'static str { 25 | "c" 26 | } 27 | 28 | fn is_acceptable_parent(&self, node: &Node) -> bool { 29 | matches!( 30 | node.kind(), 31 | "function_definition" | "declaration" | "struct_specifier" | "enum_specifier" 32 | ) 33 | } 34 | 35 | fn is_test_node(&self, node: &Node, source: &[u8]) -> bool { 36 | let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1"; 37 | let node_type = node.kind(); 38 | 39 | // C: Check function_definition nodes with test in the name 40 | if node_type == "function_definition" { 41 | let mut cursor = node.walk(); 42 | for child in node.children(&mut cursor) { 43 | if child.kind() == "function_declarator" { 44 | let mut subcursor = child.walk(); 45 | for subchild in child.children(&mut subcursor) { 46 | if subchild.kind() == "identifier" { 47 | let name = subchild.utf8_text(source).unwrap_or(""); 48 | if name.contains("test") || name.contains("Test") { 49 | if debug_mode { 50 | println!("DEBUG: Test node detected (C): test function"); 51 | } 52 | return true; 53 | } 54 | } 55 | } 56 | } 57 | } 58 | } 59 | 60 | false 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | probe: 5 | image: buger/probe:latest 6 | build: 7 | context: . 8 | dockerfile: Dockerfile 9 | args: 10 | VERSION: ${VERSION:-dev} 11 | BUILD_DATE: ${BUILD_DATE:-} 12 | VCS_REF: ${VCS_REF:-} 13 | volumes: 14 | - ./:/workspace:ro 15 | working_dir: /workspace 16 | command: --help 17 | 18 | probe-chat-cli: 19 | image: buger/probe-chat:latest 20 | build: 21 | context: ./examples/chat 22 | dockerfile: Dockerfile 23 | args: 24 | VERSION: ${VERSION:-dev} 25 | BUILD_DATE: ${BUILD_DATE:-} 26 | VCS_REF: ${VCS_REF:-} 27 | environment: 28 | - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} 29 | - OPENAI_API_KEY=${OPENAI_API_KEY} 30 | - ALLOWED_FOLDERS=${ALLOWED_FOLDERS:-} 31 | volumes: 32 | - ./:/workspace:ro 33 | working_dir: /workspace 34 | stdin_open: true 35 | tty: true 36 | 37 | probe-chat-web: 38 | image: buger/probe-chat:latest 39 | build: 40 | context: ./examples/chat 41 | dockerfile: Dockerfile 42 | args: 43 | VERSION: ${VERSION:-dev} 44 | BUILD_DATE: ${BUILD_DATE:-} 45 | VCS_REF: ${VCS_REF:-} 46 | environment: 47 | - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} 48 | - OPENAI_API_KEY=${OPENAI_API_KEY} 49 | - ALLOWED_FOLDERS=${ALLOWED_FOLDERS:-} 50 | volumes: 51 | - ./:/workspace:ro 52 | working_dir: /workspace 53 | ports: 54 | - "3000:3000" 55 | command: --web 56 | healthcheck: 57 | test: ["CMD", "curl", "-f", "http://localhost:3000/health"] 58 | interval: 30s 59 | timeout: 3s 60 | retries: 3 61 | start_period: 5s 62 | 63 | # Development profile for local builds 64 | profiles: 65 | dev: 66 | probe-dev: 67 | build: 68 | context: . 69 | dockerfile: Dockerfile 70 | cache_from: 71 | - buger/probe:latest 72 | volumes: 73 | - ./:/workspace:ro 74 | - cargo-cache:/usr/local/cargo 75 | working_dir: /workspace 76 | command: --help 77 | 78 | volumes: 79 | cargo-cache: -------------------------------------------------------------------------------- /examples/chat/implement/backends/registry.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Backend registry for automatic discovery and registration 3 | * @module registry 4 | */ 5 | 6 | import AiderBackend from './AiderBackend.js'; 7 | import ClaudeCodeBackend from './ClaudeCodeBackend.js'; 8 | 9 | /** 10 | * Available backend classes 11 | */ 12 | const AVAILABLE_BACKENDS = { 13 | aider: AiderBackend, 14 | 'claude-code': ClaudeCodeBackend 15 | }; 16 | 17 | /** 18 | * Get all available backend classes 19 | * @returns {Object} 20 | */ 21 | function getAvailableBackends() { 22 | return { ...AVAILABLE_BACKENDS }; 23 | } 24 | 25 | /** 26 | * Create a backend instance by name 27 | * @param {string} name - Backend name 28 | * @returns {BaseBackend|null} 29 | */ 30 | function createBackend(name) { 31 | const BackendClass = AVAILABLE_BACKENDS[name]; 32 | if (!BackendClass) { 33 | return null; 34 | } 35 | 36 | return new BackendClass(); 37 | } 38 | 39 | /** 40 | * Register a custom backend class 41 | * @param {string} name - Backend name 42 | * @param {typeof BaseBackend} BackendClass - Backend class 43 | */ 44 | function registerBackend(name, BackendClass) { 45 | AVAILABLE_BACKENDS[name] = BackendClass; 46 | } 47 | 48 | /** 49 | * Get backend metadata 50 | * @param {string} name - Backend name 51 | * @returns {Object|null} 52 | */ 53 | function getBackendMetadata(name) { 54 | const backend = createBackend(name); 55 | if (!backend) { 56 | return null; 57 | } 58 | 59 | return { 60 | name: backend.name, 61 | version: backend.version, 62 | description: backend.getDescription(), 63 | capabilities: backend.getCapabilities(), 64 | dependencies: backend.getRequiredDependencies() 65 | }; 66 | } 67 | 68 | /** 69 | * List all registered backend names 70 | * @returns {string[]} 71 | */ 72 | function listBackendNames() { 73 | return Object.keys(AVAILABLE_BACKENDS); 74 | } 75 | 76 | export { 77 | getAvailableBackends, 78 | createBackend, 79 | registerBackend, 80 | getBackendMetadata, 81 | listBackendNames, 82 | // Export backend classes for direct use 83 | AiderBackend, 84 | ClaudeCodeBackend 85 | }; -------------------------------------------------------------------------------- /npm/src/utils/path-validation.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Path validation utilities for the probe package 3 | * @module utils/path-validation 4 | */ 5 | 6 | import path from 'path'; 7 | import { promises as fs } from 'fs'; 8 | 9 | /** 10 | * Validates and normalizes a path to be used as working directory (cwd). 11 | * 12 | * Security considerations: 13 | * - Normalizes path to resolve '..' and '.' components 14 | * - Returns absolute path to prevent ambiguity 15 | * - Does NOT restrict access to specific directories (that's the responsibility 16 | * of higher-level components like ProbeAgent with allowedFolders) 17 | * 18 | * @param {string} inputPath - The path to validate 19 | * @param {string} [defaultPath] - Default path to use if inputPath is not provided 20 | * @returns {Promise} Normalized absolute path 21 | * @throws {Error} If the path is invalid or doesn't exist 22 | */ 23 | export async function validateCwdPath(inputPath, defaultPath = process.cwd()) { 24 | // Use default if not provided 25 | const targetPath = inputPath || defaultPath; 26 | 27 | // Normalize and resolve to absolute path 28 | // This handles '..' traversal and makes the path unambiguous 29 | const normalizedPath = path.normalize(path.resolve(targetPath)); 30 | 31 | // Verify the path exists and is a directory 32 | try { 33 | const stats = await fs.stat(normalizedPath); 34 | if (!stats.isDirectory()) { 35 | throw new Error(`Path is not a directory: ${normalizedPath}`); 36 | } 37 | } catch (error) { 38 | if (error.code === 'ENOENT') { 39 | throw new Error(`Path does not exist: ${normalizedPath}`); 40 | } 41 | throw error; 42 | } 43 | 44 | return normalizedPath; 45 | } 46 | 47 | /** 48 | * Validates a path option without requiring it to exist. 49 | * Use this for paths that might be created or are optional. 50 | * 51 | * @param {string} inputPath - The path to validate 52 | * @param {string} [defaultPath] - Default path to use if inputPath is not provided 53 | * @returns {string} Normalized absolute path 54 | */ 55 | export function normalizePath(inputPath, defaultPath = process.cwd()) { 56 | const targetPath = inputPath || defaultPath; 57 | return path.normalize(path.resolve(targetPath)); 58 | } 59 | -------------------------------------------------------------------------------- /examples/reranker/MODELS.md: -------------------------------------------------------------------------------- 1 | # MS-MARCO Cross-Encoder Models 2 | 3 | ## Available Models 4 | 5 | ### 1. TinyBERT-L-2-v2 (`ms-marco-tinybert`) 6 | - **Parameters**: 4.4M 7 | - **Layers**: 2 8 | - **Hidden Size**: 128 9 | - **Performance**: Fast but limited discrimination 10 | - **Use Case**: Quick reranking when speed is critical 11 | 12 | ### 2. MiniLM-L-6-v2 (`ms-marco-minilm-l6`) 13 | - **Parameters**: 22.7M 14 | - **Layers**: 6 15 | - **Hidden Size**: 384 16 | - **Performance**: Good balance of speed and accuracy 17 | - **Use Case**: Recommended for most applications 18 | 19 | ### 3. MiniLM-L-12-v2 (`ms-marco-minilm-l12`) 20 | - **Parameters**: 33.4M 21 | - **Layers**: 12 22 | - **Hidden Size**: 384 23 | - **Performance**: Best accuracy, slower 24 | - **Use Case**: When accuracy is more important than speed 25 | 26 | ## Performance Comparison 27 | 28 | Based on MS MARCO evaluation: 29 | 30 | | Model | MRR@10 | Params | Speed (V100) | 31 | |-------|--------|--------|--------------| 32 | | TinyBERT-L-2 | 0.312 | 4.4M | ~9000 docs/sec | 33 | | MiniLM-L-6 | 0.384 | 22.7M | ~2800 docs/sec | 34 | | MiniLM-L-12 | 0.391 | 33.4M | ~960 docs/sec | 35 | 36 | ## Usage 37 | 38 | ```bash 39 | # Download models 40 | ./download_models.sh 41 | 42 | # Use in probe 43 | probe search "query" . --reranker ms-marco-minilm-l6 --question "natural language question" 44 | ``` 45 | 46 | ## Model Architecture 47 | 48 | All models use the same cross-encoder architecture: 49 | 1. Input: `[CLS] query [SEP] document [SEP]` 50 | 2. BERT encoder processes the concatenated input 51 | 3. [CLS] token representation is passed through a linear classifier 52 | 4. Output: Single relevance score (raw logit) 53 | 54 | ## Recommendations 55 | 56 | - **Start with MiniLM-L-6**: It provides much better discrimination than TinyBERT while still being reasonably fast 57 | - **Use TinyBERT only if**: You need maximum speed and can tolerate lower accuracy 58 | - **Use MiniLM-L-12 when**: You need the best possible ranking quality 59 | 60 | ## Token Limits 61 | 62 | All models support up to 512 tokens, which is split between: 63 | - Query: typically 10-50 tokens 64 | - Document: remaining tokens (460-500) 65 | 66 | Documents are truncated if they exceed the limit. -------------------------------------------------------------------------------- /examples/reranker/MODEL_COMPARISON.md: -------------------------------------------------------------------------------- 1 | # Model Comparison Results 2 | 3 | ## Summary 4 | 5 | We successfully added support for two additional MS-MARCO cross-encoder models: 6 | - `ms-marco-minilm-l6` (22.7M parameters) 7 | - `ms-marco-minilm-l12` (33.4M parameters) 8 | 9 | ## Test Results 10 | 11 | ### TinyBERT-L-2 (4.4M params) 12 | With different questions, the top 3 results were **identical**, showing poor discrimination. 13 | 14 | ### MiniLM-L-6 (22.7M params) 15 | With different questions, we see **significant differences** in the top 10 results: 16 | 17 | **Relevant Question**: "how does authentication work" 18 | - TOKENIZATION_GUIDE.md appears first (contains auth examples) 19 | - Different ordering of results 20 | - Some unique results that don't appear with nonsense query 21 | 22 | **Nonsense Question**: "foobar random nonsense gibberish" 23 | - Different top result (README.md) 24 | - Several different files in top 10 (cli-mode.md, output-formats.md, advanced-cli.md) 25 | - Different ordering throughout 26 | 27 | ## Usage 28 | 29 | ```bash 30 | # TinyBERT (fastest, least accurate) 31 | probe search "auth" . --reranker ms-marco-tinybert --question "how does auth work" 32 | 33 | # MiniLM-L6 (balanced - RECOMMENDED) 34 | probe search "auth" . --reranker ms-marco-minilm-l6 --question "how does auth work" 35 | 36 | # MiniLM-L12 (most accurate, slower) 37 | probe search "auth" . --reranker ms-marco-minilm-l12 --question "how does auth work" 38 | ``` 39 | 40 | ## Performance 41 | 42 | Typical search times on the test repository: 43 | - TinyBERT: ~1.1s 44 | - MiniLM-L6: ~15.5s 45 | - MiniLM-L12: ~22s (estimated) 46 | 47 | ## Recommendations 48 | 49 | 1. **Use MiniLM-L6 as default** for BERT reranking - it provides much better semantic understanding 50 | 2. **TinyBERT should only be used** when speed is critical and approximate ranking is acceptable 51 | 3. **MiniLM-L12 for production** when quality matters most 52 | 53 | ## Implementation Details 54 | 55 | The implementation: 56 | - Automatically downloads models from HuggingFace on first use 57 | - Caches models locally in `examples/reranker/models/` 58 | - Uses the same cross-encoder architecture for all models 59 | - Properly handles tokenization with `encode_pair()` 60 | - Maintains backward compatibility -------------------------------------------------------------------------------- /npm/scripts/build-agent.cjs: -------------------------------------------------------------------------------- 1 | const esbuild = require('esbuild'); 2 | const path = require('path'); 3 | const fs = require('fs'); 4 | 5 | async function buildAgent() { 6 | try { 7 | console.log('Building agent...'); 8 | 9 | // Ensure build directory exists 10 | const buildDir = path.resolve(__dirname, '..', 'build', 'agent'); 11 | if (!fs.existsSync(buildDir)) { 12 | fs.mkdirSync(buildDir, { recursive: true }); 13 | } 14 | 15 | const result = await esbuild.build({ 16 | entryPoints: [path.resolve(__dirname, '..', 'src', 'agent', 'index.js')], 17 | bundle: true, 18 | outfile: path.resolve(buildDir, 'index.js'), 19 | platform: 'node', 20 | target: 'node18', 21 | format: 'esm', 22 | external: [ 23 | // AI SDK packages - use dynamic requires, must be external 24 | '@modelcontextprotocol/sdk', 25 | '@ai-sdk/anthropic', 26 | '@ai-sdk/openai', 27 | '@ai-sdk/google', 28 | '@ai-sdk/amazon-bedrock', 29 | 'ai', 30 | // Packages with dynamic requires 31 | 'fs-extra', 32 | 'tar', 33 | 'axios', 34 | 'dotenv', 35 | // Node.js built-in modules 36 | 'fs', 37 | 'path', 38 | 'crypto', 39 | 'util', 40 | 'child_process', 41 | 'stream', 42 | 'events', 43 | 'url', 44 | 'os', 45 | 'process' 46 | // Will bundle: glob, zod 47 | ], 48 | banner: { 49 | js: '#!/usr/bin/env node' 50 | }, 51 | minify: false, // Keep readable for debugging 52 | sourcemap: false, 53 | metafile: true, 54 | logLevel: 'info' 55 | }); 56 | 57 | // Make the output file executable 58 | fs.chmodSync(path.resolve(buildDir, 'index.js'), 0o755); 59 | 60 | console.log('Agent build completed successfully!'); 61 | 62 | if (result.metafile) { 63 | // Optional: log build statistics 64 | const analysis = await esbuild.analyzeMetafile(result.metafile); 65 | console.log('Build analysis:'); 66 | console.log(analysis); 67 | } 68 | 69 | } catch (error) { 70 | console.error('Agent build failed:', error); 71 | process.exit(1); 72 | } 73 | } 74 | 75 | buildAgent(); -------------------------------------------------------------------------------- /site/.vitepress/theme/blog.css: -------------------------------------------------------------------------------- 1 | /* Blog-specific styles */ 2 | .blog-theme { 3 | --blog-primary-color: #3eaf7c; 4 | --blog-text-color: #2c3e50; 5 | --blog-bg-color: #ffffff; 6 | --blog-header-bg: #f8f8f8; 7 | --blog-border-color: #eaecef; 8 | --blog-code-bg: #f8f8f8; 9 | --blog-code-color: #476582; 10 | --blog-link-color: #3eaf7c; 11 | --blog-link-hover-color: #4abf8a; 12 | } 13 | 14 | .dark .blog-theme { 15 | --blog-text-color: #f0f0f0; 16 | --blog-bg-color: #1a1a1a; 17 | --blog-header-bg: #252525; 18 | --blog-border-color: #333; 19 | --blog-code-bg: #282c34; 20 | --blog-code-color: #a8b1c2; 21 | --blog-link-color: #4abf8a; 22 | --blog-link-hover-color: #5ecf9a; 23 | } 24 | 25 | /* Blog index page styles */ 26 | .blog-post-list { 27 | /* display: grid; */ 28 | /* grid-template-columns: repeat(auto-fill, minmax(300px, 1fr)); */ 29 | /* gap: 2rem; */ 30 | /* margin-top: 2rem; */ 31 | } 32 | 33 | .blog-post-card { 34 | border: 1px solid var(--blog-border-color); 35 | border-radius: 8px; 36 | padding: 1.5rem; 37 | transition: transform 0.2s, box-shadow 0.2s; 38 | width: 100%; 39 | /* Take full width of the container */ 40 | } 41 | 42 | .blog-post-card:hover { 43 | transform: translateY(-5px); 44 | box-shadow: 0 5px 15px rgba(0, 0, 0, 0.1); 45 | } 46 | 47 | .blog-post-card h3 { 48 | margin-top: 0; 49 | font-size: 1.3rem; 50 | } 51 | 52 | .blog-post-card h3 a { 53 | color: var(--blog-primary-color); 54 | text-decoration: none; 55 | } 56 | 57 | .blog-post-meta { 58 | display: flex; 59 | flex-wrap: wrap; 60 | gap: 1rem; 61 | margin-bottom: 1rem; 62 | font-size: 0.9rem; 63 | color: var(--blog-text-color); 64 | opacity: 0.8; 65 | } 66 | 67 | .blog-post-tags { 68 | display: flex; 69 | flex-wrap: wrap; 70 | gap: 0.5rem; 71 | margin-top: 1rem; 72 | } 73 | 74 | .blog-post-tag { 75 | font-size: 0.8rem; 76 | padding: 0.2rem 0.6rem; 77 | border-radius: 4px; 78 | background-color: var(--blog-primary-color); 79 | color: white; 80 | opacity: 0.8; 81 | } 82 | 83 | .blog-post-read-more { 84 | display: inline-block; 85 | margin-top: 1rem; 86 | color: var(--blog-primary-color); 87 | font-weight: 500; 88 | text-decoration: none; 89 | } 90 | 91 | @media (max-width: 768px) { 92 | .blog-post-list { 93 | grid-template-columns: 1fr; 94 | } 95 | } -------------------------------------------------------------------------------- /examples/reranker/download_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Script to download MS-MARCO cross-encoder models for local use 4 | 5 | set -e 6 | 7 | echo "=== MS-MARCO Model Downloader ===" 8 | echo 9 | 10 | # Base directory for models 11 | MODEL_DIR="models" 12 | mkdir -p "$MODEL_DIR" 13 | 14 | # Function to download a model 15 | download_model() { 16 | local model_name=$1 17 | local model_dir=$2 18 | 19 | echo "Downloading $model_name..." 20 | mkdir -p "$MODEL_DIR/$model_dir" 21 | 22 | # Download essential files 23 | FILES=( 24 | "config.json" 25 | "tokenizer.json" 26 | "tokenizer_config.json" 27 | "vocab.txt" 28 | "pytorch_model.bin" 29 | "special_tokens_map.json" 30 | ) 31 | 32 | for file in "${FILES[@]}"; do 33 | if [ -f "$MODEL_DIR/$model_dir/$file" ]; then 34 | echo " ✓ $file already exists" 35 | else 36 | echo " ⬇ Downloading $file..." 37 | curl -L -o "$MODEL_DIR/$model_dir/$file" \ 38 | "https://huggingface.co/$model_name/resolve/main/$file" 2>/dev/null || { 39 | echo " ⚠ $file not found (might be optional)" 40 | } 41 | fi 42 | done 43 | 44 | echo "✓ $model_name download complete" 45 | echo 46 | } 47 | 48 | # Download models 49 | echo "Downloading cross-encoder models..." 50 | echo 51 | 52 | # TinyBERT (4M params) - already have this 53 | if [ -d "$MODEL_DIR/ms-marco-TinyBERT-L-2-v2" ]; then 54 | echo "✓ TinyBERT model already exists" 55 | else 56 | download_model "cross-encoder/ms-marco-TinyBERT-L-2-v2" "ms-marco-TinyBERT-L-2-v2" 57 | fi 58 | 59 | # MiniLM-L6 (22M params) 60 | download_model "cross-encoder/ms-marco-MiniLM-L-6-v2" "ms-marco-MiniLM-L-6-v2" 61 | 62 | # MiniLM-L12 (33M params) 63 | download_model "cross-encoder/ms-marco-MiniLM-L-12-v2" "ms-marco-MiniLM-L-12-v2" 64 | 65 | echo "=== Download Complete ===" 66 | echo 67 | echo "Models available in $MODEL_DIR/:" 68 | ls -la "$MODEL_DIR/" 69 | echo 70 | echo "You can now use these rerankers:" 71 | echo " --reranker ms-marco-tinybert (4M params, fastest)" 72 | echo " --reranker ms-marco-minilm-l6 (22M params, balanced)" 73 | echo " --reranker ms-marco-minilm-l12 (33M params, most accurate)" -------------------------------------------------------------------------------- /examples/chat/implement/core/timeouts.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Centralized timeout configuration for the implementation system 3 | * 4 | * This file defines all timeout values used across the implementation pipeline 5 | * to ensure consistency and maintainability. 6 | */ 7 | 8 | export const TIMEOUTS = { 9 | // Main implementation timeouts (in seconds - user-friendly) 10 | IMPLEMENT_DEFAULT: 1200, // 20 minutes - default for Claude Code/Aider execution 11 | IMPLEMENT_MINIMUM: 60, // 1 minute - minimum allowed timeout 12 | IMPLEMENT_MAXIMUM: 3600, // 1 hour - maximum allowed timeout 13 | 14 | // Quick verification checks (in milliseconds) 15 | VERSION_CHECK: 5000, // 5 seconds - claude --version, aider --version 16 | PATH_CHECK: 2000, // 2 seconds - command existence checks 17 | NPM_CHECK: 5000, // 5 seconds - npm operations 18 | WSL_CHECK: 2000, // 2 seconds - WSL availability checks 19 | 20 | // Network operations (in milliseconds) 21 | HTTP_REQUEST: 10000, // 10 seconds - GitHub URLs, remote requests 22 | FILE_FLUSH: 5000, // 5 seconds - file operations and flushing 23 | }; 24 | 25 | /** 26 | * Convert seconds to milliseconds for internal use 27 | * @param {number} seconds - Timeout in seconds 28 | * @returns {number} Timeout in milliseconds 29 | */ 30 | export function secondsToMs(seconds) { 31 | return seconds * 1000; 32 | } 33 | 34 | /** 35 | * Convert milliseconds to seconds for user display 36 | * @param {number} milliseconds - Timeout in milliseconds 37 | * @returns {number} Timeout in seconds 38 | */ 39 | export function msToSeconds(milliseconds) { 40 | return Math.floor(milliseconds / 1000); 41 | } 42 | 43 | /** 44 | * Validate timeout value is within acceptable bounds 45 | * @param {number} seconds - Timeout in seconds 46 | * @returns {boolean} True if valid 47 | */ 48 | export function isValidTimeout(seconds) { 49 | return seconds >= TIMEOUTS.IMPLEMENT_MINIMUM && seconds <= TIMEOUTS.IMPLEMENT_MAXIMUM; 50 | } 51 | 52 | /** 53 | * Get default timeout in milliseconds for internal use 54 | * @returns {number} Default timeout in milliseconds 55 | */ 56 | export function getDefaultTimeoutMs() { 57 | return secondsToMs(TIMEOUTS.IMPLEMENT_DEFAULT); 58 | } -------------------------------------------------------------------------------- /src/language/cpp.rs: -------------------------------------------------------------------------------- 1 | use super::language_trait::LanguageImpl; 2 | use tree_sitter::{Language as TSLanguage, Node}; 3 | 4 | /// Implementation of LanguageImpl for C++ 5 | pub struct CppLanguage; 6 | 7 | impl Default for CppLanguage { 8 | fn default() -> Self { 9 | Self::new() 10 | } 11 | } 12 | 13 | impl CppLanguage { 14 | pub fn new() -> Self { 15 | CppLanguage 16 | } 17 | } 18 | 19 | impl LanguageImpl for CppLanguage { 20 | fn get_tree_sitter_language(&self) -> TSLanguage { 21 | tree_sitter_cpp::LANGUAGE.into() 22 | } 23 | 24 | fn get_extension(&self) -> &'static str { 25 | "cpp" 26 | } 27 | 28 | fn is_acceptable_parent(&self, node: &Node) -> bool { 29 | matches!( 30 | node.kind(), 31 | "function_definition" 32 | | "declaration" 33 | | "struct_specifier" 34 | | "class_specifier" 35 | | "enum_specifier" 36 | | "namespace_definition" 37 | ) 38 | } 39 | 40 | fn is_test_node(&self, node: &Node, source: &[u8]) -> bool { 41 | let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1"; 42 | let node_type = node.kind(); 43 | 44 | // C++: Check function_definition nodes with test in the name 45 | if node_type == "function_definition" { 46 | let mut cursor = node.walk(); 47 | for child in node.children(&mut cursor) { 48 | if child.kind() == "function_declarator" { 49 | let mut subcursor = child.walk(); 50 | for subchild in child.children(&mut subcursor) { 51 | if subchild.kind() == "identifier" { 52 | let name = subchild.utf8_text(source).unwrap_or(""); 53 | if name.contains("test") || name.contains("Test") { 54 | if debug_mode { 55 | println!("DEBUG: Test node detected (C++): test function"); 56 | } 57 | return true; 58 | } 59 | } 60 | } 61 | } 62 | } 63 | } 64 | 65 | false 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /examples/reranker/test_parallel_performance.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "🚀 PARALLEL BERT RERANKER - COMPREHENSIVE PERFORMANCE ANALYSIS" 4 | echo "==============================================================" 5 | echo "" 6 | 7 | cd /Users/leonidbugaev/go/src/code-search/examples/reranker 8 | 9 | echo "=== CPU CORE DETECTION ===" 10 | echo "System CPU cores: $(sysctl -n hw.ncpu 2>/dev/null || nproc 2>/dev/null || echo 'unknown')" 11 | echo "Logical processors: $(sysctl -n hw.logicalcpu 2>/dev/null || echo 'unknown')" 12 | echo "" 13 | 14 | echo "=== SEQUENTIAL vs PARALLEL COMPARISON ===" 15 | echo "" 16 | 17 | echo "📊 Small scale comparison (20 docs):" 18 | ./target/release/benchmark --compare-modes --query "rust async programming" --num-docs 20 --iterations 2 19 | 20 | echo "" 21 | echo "📊 Medium scale comparison (50 docs):" 22 | ./target/release/benchmark --compare-modes --query "machine learning neural network" --num-docs 50 --iterations 2 23 | 24 | echo "" 25 | echo "📊 Large scale comparison (100 docs):" 26 | ./target/release/benchmark --compare-modes --query "database optimization indexing" --num-docs 100 --iterations 1 27 | 28 | echo "" 29 | echo "=== PURE PARALLEL PERFORMANCE ===" 30 | echo "" 31 | 32 | echo "🔥 Parallel BERT with auto-detected cores:" 33 | ./target/release/benchmark --parallel --query "search algorithm optimization" --num-docs 60 --iterations 3 34 | 35 | echo "" 36 | echo "🔥 Large-scale parallel processing:" 37 | ./target/release/benchmark --parallel --query "distributed systems performance" --num-docs 120 --iterations 1 38 | 39 | echo "" 40 | echo "=== PERFORMANCE COMPARISON SUMMARY ===" 41 | echo "" 42 | 43 | echo "💡 Original BERT (sequential): ~7-8 docs/second" 44 | echo "🚀 Parallel BERT (multi-core): ~30-40 docs/second" 45 | echo "📈 Demo algorithm (mock): ~80,000+ docs/second" 46 | echo "" 47 | echo "KEY ACHIEVEMENTS:" 48 | echo "✅ 4-6x speedup with CPU parallelization" 49 | echo "✅ Real semantic understanding maintained" 50 | echo "✅ Scales efficiently with CPU cores" 51 | echo "✅ Thread-safe BERT model sharing" 52 | echo "✅ Automatic core detection and optimization" 53 | echo "" 54 | echo "==============================================================" 55 | echo "🎯 PARALLEL BERT RERANKER IMPLEMENTATION COMPLETE!" 56 | echo "==============================================================" -------------------------------------------------------------------------------- /scripts/claude-hook-wrapper.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | 4 | #––– Claude Hook Wrapper ––––––––––––––––––––––––––––––––––––––––––––––––– 5 | # Generic wrapper that runs any command and formats output for Claude Code 6 | # Usage: ./scripts/claude-hook-wrapper.sh [args...] 7 | # 8 | # Returns JSON with: 9 | # - decision: "approve" if command exits with 0, "block" otherwise 10 | # - reason: Success message or command output on failure 11 | 12 | #––– CONSTANTS –––––––––––––––––––––––––––––––––––––––––––––––––––––––––– 13 | readonly PASS='approve' 14 | readonly FAIL='block' 15 | 16 | #––– SHORT-CIRCUIT WHEN NESTED –––––––––––––––––––––––––––––––––––––––––– 17 | if [[ ${CLAUDE_STOP_HOOK_ACTIVE:-false} == "true" ]]; then 18 | printf '{"decision":"%s"}\n' "$PASS" 19 | exit 0 20 | fi 21 | 22 | #––– CHANGE TO REPOSITORY ROOT ––––––––––––––––––––––––––––––––––––––––– 23 | # This ensures relative paths work correctly regardless of where Claude runs the hook 24 | cd "$(dirname "$0")/.." 25 | 26 | #––– VALIDATE ARGUMENTS ––––––––––––––––––––––––––––––––––––––––––––––––– 27 | if [[ $# -eq 0 ]]; then 28 | printf '{"decision":"%s","reason":"Error: No command provided to claude-hook-wrapper.sh"}\n' "$FAIL" 29 | exit 1 30 | fi 31 | 32 | #––– JSON ESCAPE FUNCTION –––––––––––––––––––––––––––––––––––––––––––––– 33 | json_escape() { 34 | if command -v jq >/dev/null 2>&1; then 35 | jq -Rs '.' <<<"$1" 36 | else 37 | # Fallback if jq is not available 38 | printf '%s' "$1" | sed 's/\\/\\\\/g; s/"/\\"/g; s/ /\\t/g' | awk '{gsub(/\r/,"\\r"); gsub(/\n/,"\\n"); printf "%s\\n", $0}' | sed '$ s/\\n$//' 39 | fi 40 | } 41 | 42 | #––– RUN COMMAND –––––––––––––––––––––––––––––––––––––––––––––––––––––––– 43 | # Capture both stdout and stderr 44 | output=$(mktemp) 45 | trap 'rm -f "$output"' EXIT 46 | 47 | # Run the command, capturing all output 48 | if "$@" >"$output" 2>&1; then 49 | # Command succeeded 50 | printf '{"decision":"%s","reason":"✅ %s completed successfully!"}\n' "$PASS" "$1" 51 | else 52 | # Command failed - include the output in the reason 53 | exit_code=$? 54 | reason=$(printf "❌ %s failed with exit code %d!\n\nOutput:\n%s\n\n💡 Please fix the issues above and try again." "$1" "$exit_code" "$(<"$output")") 55 | printf '{"decision":"%s","reason":%s}\n' "$FAIL" "$(json_escape "$reason")" 56 | fi -------------------------------------------------------------------------------- /examples/reranker/test_cross_encoder.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Cross-encoder testing script setup and runner 4 | # This script sets up the Python environment and runs the cross-encoder tests 5 | 6 | set -e 7 | 8 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 9 | cd "$SCRIPT_DIR" 10 | 11 | echo "=== Cross-Encoder Model Testing Setup ===" 12 | echo "Working directory: $SCRIPT_DIR" 13 | 14 | # Check if Python 3 is available 15 | if ! command -v python3 &> /dev/null; then 16 | echo "❌ Python 3 is required but not found" 17 | exit 1 18 | fi 19 | 20 | echo "✓ Python 3 found: $(python3 --version)" 21 | 22 | # Check if pip is available 23 | if ! command -v pip3 &> /dev/null; then 24 | echo "❌ pip3 is required but not found" 25 | exit 1 26 | fi 27 | 28 | echo "✓ pip3 found" 29 | 30 | # Install or check requirements 31 | echo "" 32 | echo "Checking Python dependencies..." 33 | 34 | # Function to check if a package is installed 35 | check_package() { 36 | python3 -c "import $1" 2>/dev/null && return 0 || return 1 37 | } 38 | 39 | # Check required packages 40 | REQUIRED_PACKAGES=("torch" "transformers" "numpy") 41 | MISSING_PACKAGES=() 42 | 43 | for package in "${REQUIRED_PACKAGES[@]}"; do 44 | if check_package "$package"; then 45 | echo "✓ $package is installed" 46 | else 47 | echo "❌ $package is missing" 48 | MISSING_PACKAGES+=("$package") 49 | fi 50 | done 51 | 52 | # Check optional package 53 | if check_package "sentence_transformers"; then 54 | echo "✓ sentence-transformers is installed" 55 | else 56 | echo "⚠️ sentence-transformers is missing (optional but recommended)" 57 | MISSING_PACKAGES+=("sentence-transformers") 58 | fi 59 | 60 | # Install missing packages if any 61 | if [ ${#MISSING_PACKAGES[@]} -gt 0 ]; then 62 | echo "" 63 | echo "Installing missing packages..." 64 | pip3 install "${MISSING_PACKAGES[@]}" 65 | echo "✓ Dependencies installed" 66 | else 67 | echo "✓ All required dependencies are installed" 68 | fi 69 | 70 | echo "" 71 | echo "=== Running Cross-Encoder Tests ===" 72 | echo "" 73 | 74 | # Run the test script 75 | python3 test_cross_encoder.py 76 | 77 | echo "" 78 | echo "=== Test Complete ===" 79 | echo "Check the output above for score comparisons and debugging information" 80 | echo "Results have been saved to cross_encoder_test_results.json" -------------------------------------------------------------------------------- /src/language/php.rs: -------------------------------------------------------------------------------- 1 | use super::language_trait::LanguageImpl; 2 | use tree_sitter::{Language as TSLanguage, Node}; 3 | 4 | /// Implementation of LanguageImpl for PHP 5 | pub struct PhpLanguage; 6 | 7 | impl Default for PhpLanguage { 8 | fn default() -> Self { 9 | Self::new() 10 | } 11 | } 12 | 13 | impl PhpLanguage { 14 | pub fn new() -> Self { 15 | PhpLanguage 16 | } 17 | } 18 | 19 | impl LanguageImpl for PhpLanguage { 20 | fn get_tree_sitter_language(&self) -> TSLanguage { 21 | tree_sitter_php::LANGUAGE_PHP.into() 22 | } 23 | 24 | fn get_extension(&self) -> &'static str { 25 | "php" 26 | } 27 | 28 | fn is_acceptable_parent(&self, node: &Node) -> bool { 29 | matches!( 30 | node.kind(), 31 | "function_definition" 32 | | "method_declaration" 33 | | "class_declaration" 34 | | "interface_declaration" 35 | | "trait_declaration" 36 | ) 37 | } 38 | 39 | fn is_test_node(&self, node: &Node, source: &[u8]) -> bool { 40 | let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1"; 41 | let node_type = node.kind(); 42 | 43 | // PHP: Check method_declaration nodes with test prefix or PHPUnit annotations 44 | if node_type == "method_declaration" { 45 | let mut cursor = node.walk(); 46 | for child in node.children(&mut cursor) { 47 | if child.kind() == "name" { 48 | let name = child.utf8_text(source).unwrap_or(""); 49 | if name.starts_with("test") { 50 | if debug_mode { 51 | println!("DEBUG: Test node detected (PHP): test method"); 52 | } 53 | return true; 54 | } 55 | } else if child.kind() == "comment" { 56 | let comment = child.utf8_text(source).unwrap_or(""); 57 | if comment.contains("@test") { 58 | if debug_mode { 59 | println!("DEBUG: Test node detected (PHP): @test annotation"); 60 | } 61 | return true; 62 | } 63 | } 64 | } 65 | } 66 | 67 | false 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /.prompts/engineer.md: -------------------------------------------------------------------------------- 1 | You are senior enginer focused software architecture and design. Before jumping on the task you first, in details analyse user request, and try to provide elegant and concise solution. If solution is clear, you can jump to implementation right away, if not, you can ask user a clarification question, by calling attempt_completion tool, with required details. You are allowed to use search tool with allow_tests argument, in order to find the tests. When you are reviewing pull request, or asked to do a suggestions to PR, you can use implement tool too. 2 | 3 | Before jumping to implementation: 4 | - Focus on high-level design patterns and system organization 5 | - Identify architectural patterns and component relationships 6 | - Evaluate system structure and suggest architectural improvements 7 | - Focus on backward compatibility. 8 | - Respond with diagrams to illustrate system architecture and workflows, if required. 9 | - Consider scalability, maintainability, and extensibility in your analysis 10 | 11 | During the implementation: 12 | - Avoid implementing special cases 13 | - Do not forget to add the tests 14 | 15 | ## Failure Tag Feature 16 | 17 | When working on GitHub Actions workflows, you can use the failure tag feature to signal critical issues that should prevent code from being merged: 18 | 19 | - Include `` in your response when you detect critical issues like security vulnerabilities, breaking changes without proper documentation, or severe bugs 20 | - The tag will be automatically removed from your comment, but a failure message will be added at the top 21 | - The GitHub check will fail, drawing attention to these critical issues 22 | - Use this feature judiciously - only for issues that truly warrant failing the CI check 23 | 24 | ### Example Usage 25 | 26 | ``` 27 | 28 | 29 | I found a critical security vulnerability in the authentication code that allows SQL injection attacks. This must be fixed before merging. 30 | 31 | ## Security Issues Found 32 | 33 | 1. **SQL Injection in login.js** - User input is directly concatenated into SQL queries 34 | 2. **Missing input validation** - No sanitization of user credentials 35 | 36 | ## Recommendations 37 | - Use parameterized queries 38 | - Add input validation middleware 39 | ``` 40 | 41 | The `` tag will be stripped from the comment, but the GitHub check will fail to prevent merging until the issues are resolved. 42 | -------------------------------------------------------------------------------- /tests/outline_keyword_preservation_test.rs: -------------------------------------------------------------------------------- 1 | use std::process::Command; 2 | 3 | #[test] 4 | fn test_outline_format_preserves_keywords_in_truncated_arrays() { 5 | // Run probe search with outline format on a file known to have large arrays with keywords 6 | let output = Command::new("./target/release/probe") 7 | .args([ 8 | "search", 9 | "stemming", 10 | "./src/search/tokenization.rs", 11 | "--format", 12 | "outline", 13 | ]) 14 | .output() 15 | .expect("Failed to execute probe command"); 16 | 17 | let stdout = String::from_utf8(output.stdout).expect("Invalid UTF-8 in output"); 18 | 19 | // The output should contain the keyword "stemming" even in truncated arrays 20 | assert!( 21 | stdout.contains("stemming"), 22 | "Output should contain 'stemming' keyword even in truncated arrays" 23 | ); 24 | 25 | // The output should show truncation with "..." 26 | assert!( 27 | stdout.contains("..."), 28 | "Output should show truncation with ellipsis" 29 | ); 30 | 31 | // The output should have reasonable length (not thousands of lines like before) 32 | let line_count = stdout.lines().count(); 33 | assert!( 34 | line_count < 200, 35 | "Output should be truncated to reasonable size, got {} lines", 36 | line_count 37 | ); 38 | } 39 | 40 | #[test] 41 | fn test_outline_format_highlights_keywords_in_comments() { 42 | // Test that keywords are highlighted in function signatures and comments 43 | let output = Command::new("./target/release/probe") 44 | .args([ 45 | "search", 46 | "stem", 47 | "./src/search/tokenization.rs", 48 | "--format", 49 | "outline", 50 | ]) 51 | .output() 52 | .expect("Failed to execute probe command"); 53 | 54 | let stdout = String::from_utf8(output.stdout).expect("Invalid UTF-8 in output"); 55 | 56 | // Should contain the function name with highlighting (though we can't test ANSI codes easily) 57 | assert!( 58 | stdout.contains("tokenize_and_stem"), 59 | "Should contain function name with stem keyword" 60 | ); 61 | 62 | // Should contain comment lines with the keyword 63 | assert!( 64 | stdout.contains("apply stemming"), 65 | "Should contain comment with stemming keyword" 66 | ); 67 | } 68 | -------------------------------------------------------------------------------- /src/language/java.rs: -------------------------------------------------------------------------------- 1 | use super::language_trait::LanguageImpl; 2 | use tree_sitter::{Language as TSLanguage, Node}; 3 | 4 | /// Implementation of LanguageImpl for Java 5 | pub struct JavaLanguage; 6 | 7 | impl Default for JavaLanguage { 8 | fn default() -> Self { 9 | Self::new() 10 | } 11 | } 12 | 13 | impl JavaLanguage { 14 | pub fn new() -> Self { 15 | JavaLanguage 16 | } 17 | } 18 | 19 | impl LanguageImpl for JavaLanguage { 20 | fn get_tree_sitter_language(&self) -> TSLanguage { 21 | tree_sitter_java::LANGUAGE.into() 22 | } 23 | 24 | fn get_extension(&self) -> &'static str { 25 | "java" 26 | } 27 | 28 | fn is_acceptable_parent(&self, node: &Node) -> bool { 29 | matches!( 30 | node.kind(), 31 | "method_declaration" 32 | | "class_declaration" 33 | | "interface_declaration" 34 | | "enum_declaration" 35 | | "constructor_declaration" 36 | | "field_declaration" 37 | | "variable_declaration" 38 | | "block" 39 | | "static_initializer" 40 | ) 41 | } 42 | 43 | fn is_test_node(&self, node: &Node, source: &[u8]) -> bool { 44 | let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1"; 45 | let node_type = node.kind(); 46 | 47 | // Java: Check method_declaration nodes with @Test annotation 48 | if node_type == "method_declaration" { 49 | let mut cursor = node.walk(); 50 | for child in node.children(&mut cursor) { 51 | if child.kind() == "modifiers" { 52 | let mut subcursor = child.walk(); 53 | for annotation in child.children(&mut subcursor) { 54 | if annotation.kind() == "annotation" { 55 | let annotation_text = annotation.utf8_text(source).unwrap_or(""); 56 | if annotation_text.contains("@Test") { 57 | if debug_mode { 58 | println!("DEBUG: Test node detected (Java): @Test method"); 59 | } 60 | return true; 61 | } 62 | } 63 | } 64 | } 65 | } 66 | } 67 | 68 | false 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/language/ruby.rs: -------------------------------------------------------------------------------- 1 | use super::language_trait::LanguageImpl; 2 | use tree_sitter::{Language as TSLanguage, Node}; 3 | 4 | /// Implementation of LanguageImpl for Ruby 5 | pub struct RubyLanguage; 6 | 7 | impl Default for RubyLanguage { 8 | fn default() -> Self { 9 | Self::new() 10 | } 11 | } 12 | 13 | impl RubyLanguage { 14 | pub fn new() -> Self { 15 | RubyLanguage 16 | } 17 | } 18 | 19 | impl LanguageImpl for RubyLanguage { 20 | fn get_tree_sitter_language(&self) -> TSLanguage { 21 | tree_sitter_ruby::LANGUAGE.into() 22 | } 23 | 24 | fn get_extension(&self) -> &'static str { 25 | "rb" 26 | } 27 | 28 | fn is_acceptable_parent(&self, node: &Node) -> bool { 29 | matches!( 30 | node.kind(), 31 | "method" | "class" | "module" | "singleton_method" 32 | ) 33 | } 34 | 35 | fn is_test_node(&self, node: &Node, source: &[u8]) -> bool { 36 | let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1"; 37 | let node_type = node.kind(); 38 | 39 | // Ruby: Check method nodes with test_ prefix or describe/it blocks 40 | if node_type == "method" { 41 | let mut cursor = node.walk(); 42 | for child in node.children(&mut cursor) { 43 | if child.kind() == "identifier" { 44 | let name = child.utf8_text(source).unwrap_or(""); 45 | if name.starts_with("test_") { 46 | if debug_mode { 47 | println!("DEBUG: Test node detected (Ruby): test_ method"); 48 | } 49 | return true; 50 | } 51 | } 52 | } 53 | } else if node_type == "call" { 54 | let mut cursor = node.walk(); 55 | for child in node.children(&mut cursor) { 56 | if child.kind() == "identifier" { 57 | let name = child.utf8_text(source).unwrap_or(""); 58 | if name == "describe" || name == "it" || name == "context" || name == "specify" 59 | { 60 | if debug_mode { 61 | println!("DEBUG: Test node detected (Ruby): {name} block"); 62 | } 63 | return true; 64 | } 65 | } 66 | } 67 | } 68 | 69 | false 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Rust specific ignores 2 | /target/ 3 | **/*.rs.bk 4 | *.pdb 5 | Cargo.lock 6 | .vscode 7 | # Uncomment the line below if this is a library (keep it commented if it's an application) 8 | # Cargo.lock 9 | 10 | # JSON files 11 | *.json 12 | !Cargo.json 13 | !package.json 14 | !tsconfig.json 15 | 16 | # YAML files 17 | *.yml 18 | *.yaml 19 | 20 | # Node.js specific ignores 21 | node_modules 22 | npm-debug.log 23 | yarn-debug.log 24 | yarn-error.log 25 | .pnpm-debug.log 26 | .npm 27 | .yarn/cache 28 | .yarn/unplugged 29 | .yarn/build-state.yml 30 | .yarn/install-state.gz 31 | .pnp.* 32 | package-lock.json 33 | # Uncomment the line below if you want to include package-lock.json in version control 34 | # !package-lock.json 35 | 36 | # Build outputs 37 | /dist/ 38 | /build/ 39 | npm/build/ 40 | npm/cjs/ 41 | /out/ 42 | /.next/ 43 | /.nuxt/ 44 | /.output/ 45 | 46 | # Environment variables 47 | .env 48 | .env.local 49 | .env.development.local 50 | .env.test.local 51 | .env.production.local 52 | 53 | # Logs 54 | logs 55 | *.log 56 | *.jsonl 57 | npm-debug.log* 58 | yarn-debug.log* 59 | yarn-error.log* 60 | pnpm-debug.log* 61 | lerna-debug.log* 62 | 63 | # Editor directories and files 64 | .idea/ 65 | .vscode/* 66 | !.vscode/extensions.json 67 | !.vscode/settings.json 68 | !.vscode/tasks.json 69 | !.vscode/launch.json 70 | *.suo 71 | *.ntvs* 72 | *.njsproj 73 | *.sln 74 | *.sw? 75 | .DS_Store 76 | .AppleDouble 77 | .LSOverride 78 | Thumbs.db 79 | ehthumbs.db 80 | Desktop.ini 81 | $RECYCLE.BIN/ 82 | 83 | # Testing 84 | /coverage 85 | .nyc_output 86 | npm/coverage/ 87 | **/coverage/ 88 | .jest-cache/ 89 | **/.jest-cache/ 90 | test-results/ 91 | junit.xml 92 | *.lcov 93 | 94 | # Temporary files 95 | *.tmp 96 | *.temp 97 | .cache/ 98 | .parcel-cache/ 99 | .eslintcache 100 | .stylelintcache 101 | 102 | # Rust analyzer 103 | rust-project.json 104 | 105 | # Debug files 106 | *.stackdump 107 | 108 | # Protocol Buffer files 109 | *.proto 110 | *_pb2.py 111 | *.pb.css 112 | *.pb.h 113 | *.pb.* 114 | 115 | 116 | .vitepress 117 | .aider* 118 | 119 | # Chat debug files 120 | examples/chat/probe-debug*.txt 121 | 122 | # BERT model files (too large for git) 123 | examples/reranker/models/*/pytorch_model.bin 124 | examples/reranker/models/*/model.safetensors 125 | examples/reranker/models/*/vocab.txt 126 | examples/reranker/cross_encoder_test_results.json 127 | *.tgz 128 | 129 | # Vow - AI accountability files 130 | .vow* 131 | -------------------------------------------------------------------------------- /npm/test-codex-e2e.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | /** 4 | * End-to-end test for Codex integration 5 | * Run with: node test-codex-e2e.js 6 | */ 7 | 8 | import { ProbeAgent } from './src/agent/ProbeAgent.js'; 9 | 10 | console.log('🧪 Codex Integration E2E Test\n'); 11 | console.log('Testing basic query with Codex engine...\n'); 12 | 13 | async function main() { 14 | let agent; 15 | 16 | try { 17 | // Create agent with Codex provider (use default model, not gpt-4o) 18 | console.log('1️⃣ Creating ProbeAgent with provider: codex (using default model)'); 19 | agent = new ProbeAgent({ 20 | provider: 'codex', 21 | model: null, // Don't specify model, let Codex use its default 22 | allowedFolders: [process.cwd()], 23 | debug: true 24 | }); 25 | 26 | console.log('\n2️⃣ Initializing agent...'); 27 | await agent.initialize(); 28 | 29 | console.log('\n✅ Agent initialized successfully!'); 30 | console.log(` Provider: ${agent.clientApiProvider}`); 31 | console.log(` API Type: ${agent.apiType}`); 32 | console.log(` Model: ${agent.model}`); 33 | 34 | // Test simple query 35 | console.log('\n3️⃣ Testing simple query: "What is 2 + 2?"'); 36 | console.log(' (This should trigger Codex CLI)\n'); 37 | 38 | const response = await agent.answer('What is 2 + 2?'); 39 | 40 | console.log('\n✅ Query completed!'); 41 | console.log('\n📝 Response:'); 42 | console.log('─'.repeat(60)); 43 | console.log(response); 44 | console.log('─'.repeat(60)); 45 | 46 | // Clean up 47 | console.log('\n4️⃣ Cleaning up...'); 48 | if (agent.engine && agent.engine.close) { 49 | await agent.engine.close(); 50 | } 51 | 52 | console.log('\n✅ All tests passed! 🎉\n'); 53 | process.exit(0); 54 | 55 | } catch (error) { 56 | console.error('\n❌ Test failed:', error.message); 57 | console.error('\nStack trace:'); 58 | console.error(error.stack); 59 | 60 | // Clean up on error 61 | if (agent?.engine?.close) { 62 | try { 63 | await agent.engine.close(); 64 | } catch (cleanupError) { 65 | // Ignore cleanup errors 66 | } 67 | } 68 | 69 | console.log('\n💡 Common issues:'); 70 | console.log(' - Make sure Codex CLI is installed: https://openai.com/codex'); 71 | console.log(' - Check that you can run: codex --version'); 72 | console.log(' - Ensure you have an active Codex session'); 73 | 74 | process.exit(1); 75 | } 76 | } 77 | 78 | main(); 79 | -------------------------------------------------------------------------------- /npm/tests/unit/extract-content.test.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Test for extract() function with content parameter 3 | * This test verifies the fix for the process.env.DEBUG bug 4 | */ 5 | 6 | import { extract } from '../../src/index.js'; 7 | import path from 'path'; 8 | 9 | describe('extract() with content parameter', () => { 10 | // Sample diff content for testing 11 | const diffContent = `diff --git a/src/main.rs b/src/main.rs 12 | index 123..456 13 | --- a/src/main.rs 14 | +++ b/src/main.rs 15 | @@ -10,3 +10,4 @@ 16 | fn main() { 17 | - println!("old"); 18 | + println!("new"); 19 | }`; 20 | 21 | test('should process diff content without crashing', async () => { 22 | // This test verifies that the extract function doesn't crash 23 | // when accessing process.env.DEBUG 24 | const result = await extract({ 25 | content: diffContent, 26 | format: 'outline-xml', 27 | }); 28 | 29 | // Should return a result (string for outline-xml format) 30 | expect(result).toBeDefined(); 31 | expect(typeof result).toBe('string'); 32 | expect(result.length).toBeGreaterThan(0); 33 | }); 34 | 35 | test('should handle DEBUG environment variable correctly', async () => { 36 | // Test with DEBUG enabled 37 | const originalDebug = process.env.DEBUG; 38 | process.env.DEBUG = '1'; 39 | 40 | try { 41 | const result = await extract({ 42 | content: diffContent, 43 | format: 'outline-xml', 44 | }); 45 | 46 | expect(result).toBeDefined(); 47 | } finally { 48 | // Restore original DEBUG value 49 | if (originalDebug === undefined) { 50 | delete process.env.DEBUG; 51 | } else { 52 | process.env.DEBUG = originalDebug; 53 | } 54 | } 55 | }); 56 | 57 | test('should work with outline-xml format', async () => { 58 | const result = await extract({ 59 | content: diffContent, 60 | format: 'outline-xml', 61 | }); 62 | 63 | expect(result).toBeDefined(); 64 | expect(typeof result).toBe('string'); 65 | expect(result.length).toBeGreaterThan(0); 66 | }); 67 | 68 | test('should handle errors gracefully', async () => { 69 | // Test with invalid content 70 | try { 71 | await extract({ 72 | content: 'invalid diff content', 73 | format: 'outline-xml', 74 | }); 75 | // If it succeeds, that's also acceptable 76 | } catch (error) { 77 | // Should throw a proper Error object, not a TypeError about undefined 78 | expect(error).toBeInstanceOf(Error); 79 | expect(error.message).not.toContain('Cannot read properties of undefined'); 80 | expect(error.message).not.toContain('process2'); 81 | } 82 | }); 83 | }); 84 | -------------------------------------------------------------------------------- /examples/chat/cancelRequest.js: -------------------------------------------------------------------------------- 1 | // Map to store active requests by session ID 2 | const activeRequests = new Map(); 3 | 4 | /** 5 | * Register a request as active 6 | * @param {string} sessionId - The session ID 7 | * @param {Object} requestData - Data about the request (can include abort functions, etc.) 8 | */ 9 | export function registerRequest(sessionId, requestData) { 10 | if (!sessionId) { 11 | console.warn('Attempted to register request without session ID'); 12 | return; 13 | } 14 | 15 | console.log(`Registering request for session: ${sessionId}`); 16 | activeRequests.set(sessionId, requestData); 17 | } 18 | 19 | /** 20 | * Cancel a request by session ID 21 | * @param {string} sessionId - The session ID 22 | * @returns {boolean} - Whether the cancellation was successful 23 | */ 24 | export function cancelRequest(sessionId) { 25 | if (!sessionId) { 26 | console.warn('Attempted to cancel request without session ID'); 27 | return false; 28 | } 29 | 30 | const requestData = activeRequests.get(sessionId); 31 | if (!requestData) { 32 | console.warn(`No active request found for session: ${sessionId}`); 33 | return false; 34 | } 35 | 36 | console.log(`Cancelling request for session: ${sessionId}`); 37 | 38 | // Call the abort function if it exists 39 | if (typeof requestData.abort === 'function') { 40 | try { 41 | requestData.abort(); 42 | console.log(`Successfully aborted request for session: ${sessionId}`); 43 | } catch (error) { 44 | console.error(`Error aborting request for session ${sessionId}:`, error); 45 | } 46 | } 47 | 48 | // Remove the request from the active requests map 49 | activeRequests.delete(sessionId); 50 | return true; 51 | } 52 | 53 | /** 54 | * Check if a request is active 55 | * @param {string} sessionId - The session ID 56 | * @returns {boolean} - Whether the request is active 57 | */ 58 | export function isRequestActive(sessionId) { 59 | return activeRequests.has(sessionId); 60 | } 61 | 62 | /** 63 | * Get all active requests 64 | * @returns {Map} - Map of all active requests 65 | */ 66 | export function getActiveRequests() { 67 | return activeRequests; 68 | } 69 | 70 | /** 71 | * Clear a request from the active requests map 72 | * @param {string} sessionId - The session ID 73 | */ 74 | export function clearRequest(sessionId) { 75 | if (!sessionId) { 76 | console.warn('Attempted to clear request without session ID'); 77 | return; 78 | } 79 | 80 | if (activeRequests.has(sessionId)) { 81 | console.log(`Clearing request for session: ${sessionId}`); 82 | activeRequests.delete(sessionId); 83 | } 84 | } -------------------------------------------------------------------------------- /src/search/limits.rs: -------------------------------------------------------------------------------- 1 | use probe_code::models::{LimitedSearchResults, SearchResult}; 2 | use probe_code::search::token_utils::count_tokens; 3 | 4 | /// Helper function to apply limits to search results 5 | pub fn apply_limits( 6 | results: Vec, 7 | max_results: Option, 8 | max_bytes: Option, 9 | max_tokens: Option, 10 | ) -> LimitedSearchResults { 11 | // If no limits are specified, return all results 12 | if max_results.is_none() && max_bytes.is_none() && max_tokens.is_none() { 13 | return LimitedSearchResults { 14 | results, 15 | truncated: false, 16 | total_results: results.len(), 17 | total_bytes: results.iter().map(|r| r.content.len()).sum(), 18 | total_tokens: results.iter().map(|r| count_tokens(&r.content)).sum(), 19 | }; 20 | } 21 | 22 | let mut limited_results = Vec::new(); 23 | let mut current_bytes = 0; 24 | let mut current_tokens = 0; 25 | let mut truncated = false; 26 | 27 | // Calculate total bytes and tokens for all results 28 | let total_bytes = results.iter().map(|r| r.content.len()).sum(); 29 | let total_tokens = results.iter().map(|r| count_tokens(&r.content)).sum(); 30 | 31 | // Apply limits 32 | for result in results { 33 | // Check if we've reached the maximum number of results 34 | if let Some(max) = max_results { 35 | if limited_results.len() >= max { 36 | truncated = true; 37 | break; 38 | } 39 | } 40 | 41 | // Check if adding this result would exceed the maximum bytes 42 | if let Some(max) = max_bytes { 43 | if current_bytes + result.content.len() > max { 44 | truncated = true; 45 | break; 46 | } 47 | } 48 | 49 | // Check if adding this result would exceed the maximum tokens 50 | if let Some(max) = max_tokens { 51 | let result_tokens = count_tokens(&result.content); 52 | if current_tokens + result_tokens > max { 53 | truncated = true; 54 | break; 55 | } 56 | current_tokens += result_tokens; 57 | } 58 | 59 | // Add the result to the limited results 60 | current_bytes += result.content.len(); 61 | limited_results.push(result); 62 | } 63 | 64 | LimitedSearchResults { 65 | results: limited_results, 66 | truncated, 67 | total_results: limited_results.len(), 68 | total_bytes: current_bytes, 69 | total_tokens: current_tokens, 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /examples/chat/auth.js: -------------------------------------------------------------------------------- 1 | import 'dotenv/config'; 2 | 3 | /** 4 | * Basic authentication middleware 5 | * Checks for valid username and password in the Authorization header 6 | * Can be enabled/disabled via environment variables 7 | */ 8 | export function authMiddleware(req, res, next) { 9 | // Check if authentication is enabled 10 | const AUTH_ENABLED = process.env.AUTH_ENABLED === '1'; 11 | 12 | // If authentication is not enabled, skip authentication check 13 | if (!AUTH_ENABLED) { 14 | return next(req, res); 15 | } 16 | 17 | // Get configured username and password from environment variables 18 | const AUTH_USERNAME = process.env.AUTH_USERNAME || 'admin'; 19 | const AUTH_PASSWORD = process.env.AUTH_PASSWORD || 'password'; 20 | 21 | // Check if request has Authorization header 22 | const authHeader = req.headers.authorization; 23 | 24 | if (!authHeader) { 25 | // No Authorization header, return 401 Unauthorized 26 | res.writeHead(401, { 27 | 'Content-Type': 'text/plain', 28 | 'WWW-Authenticate': 'Basic realm="Probe Code Search"' 29 | }); 30 | res.end('Authentication required'); 31 | return; 32 | } 33 | 34 | // Parse Authorization header 35 | try { 36 | // Basic auth format: "Basic base64(username:password)" 37 | const authParts = authHeader.split(' '); 38 | if (authParts.length !== 2 || authParts[0] !== 'Basic') { 39 | throw new Error('Invalid Authorization header format'); 40 | } 41 | 42 | // Decode base64 credentials 43 | const credentials = Buffer.from(authParts[1], 'base64').toString('utf-8'); 44 | const [username, password] = credentials.split(':'); 45 | 46 | // Check if credentials match 47 | if (username === AUTH_USERNAME && password === AUTH_PASSWORD) { 48 | // Authentication successful, proceed to next middleware 49 | return next(req, res); 50 | } else { 51 | // Invalid credentials, return 401 Unauthorized 52 | res.writeHead(401, { 53 | 'Content-Type': 'text/plain', 54 | 'WWW-Authenticate': 'Basic realm="Probe Code Search"' 55 | }); 56 | res.end('Invalid credentials'); 57 | return; 58 | } 59 | } catch (error) { 60 | // Error parsing Authorization header, return 400 Bad Request 61 | res.writeHead(400, { 'Content-Type': 'text/plain' }); 62 | res.end('Invalid Authorization header'); 63 | return; 64 | } 65 | } 66 | 67 | /** 68 | * Apply authentication middleware to a request handler 69 | * @param {Function} handler - The request handler function 70 | * @returns {Function} - A new handler function with authentication 71 | */ 72 | export function withAuth(handler) { 73 | return (req, res) => { 74 | authMiddleware(req, res, () => handler(req, res)); 75 | }; 76 | } -------------------------------------------------------------------------------- /site/.vitepress/theme/home.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --vp-home-hero-name-color: var(--vp-c-brand); 3 | --vp-home-hero-text-color: var(--vp-c-text-1); 4 | --vp-home-hero-tagline-color: var(--vp-c-text-2); 5 | --home-bg-overlay: rgba(255, 255, 255, 0.9); 6 | --home-feature-bg: rgba(255, 255, 255, 0.9); 7 | --home-border-color: rgba(60, 60, 60, 0.12); 8 | } 9 | 10 | .dark { 11 | --home-bg-overlay: rgba(26, 26, 26, 0.9); 12 | --home-feature-bg: rgba(26, 26, 26, 0.9); 13 | --home-border-color: rgba(200, 200, 200, 0.12); 14 | } 15 | 16 | .VPHome { 17 | position: relative; 18 | z-index: 1; 19 | } 20 | 21 | .VPHome .VPHomeHero { 22 | background: transparent; 23 | position: relative; 24 | z-index: 2; 25 | } 26 | 27 | .VPHome .VPHomeHero .image { 28 | background: transparent; 29 | margin-top: 2rem; 30 | } 31 | 32 | .VPHome .VPHomeHero .image img { 33 | max-width: 300px; 34 | height: auto; 35 | margin: 0 auto; 36 | } 37 | 38 | .VPHome .VPFeatures { 39 | background: var(--home-feature-bg); 40 | backdrop-filter: blur(10px); 41 | border-radius: 12px; 42 | padding: 2rem; 43 | margin: 2rem auto; 44 | max-width: 1200px; 45 | position: relative; 46 | z-index: 2; 47 | border: 1px solid var(--home-border-color); 48 | } 49 | 50 | .VPHome .VPFeatures .VPFeature { 51 | background: transparent; 52 | } 53 | 54 | .main-content { 55 | position: relative; 56 | z-index: 2; 57 | max-width: 1200px; 58 | margin: 0 auto; 59 | padding: 2rem; 60 | /* background: var(--home-bg-overlay); */ 61 | /* backdrop-filter: blur(10px); */ 62 | /* border-radius: 12px; */ 63 | /* border: 1px solid var(--home-border-color); */ 64 | } 65 | 66 | @media (max-width: 768px) { 67 | .main-content { 68 | padding: 1rem 0.5rem; 69 | } 70 | } 71 | 72 | @media (max-width: 640px) { 73 | .main-content { 74 | padding: 0.75rem 0.25rem; 75 | } 76 | } 77 | 78 | .VPFeatures { 79 | margin-bottom: 4rem !important; 80 | } 81 | 82 | /* Add styles for FeatureSection */ 83 | .FeatureSection { 84 | display: grid; 85 | grid-template-columns: 1fr 1fr; 86 | gap: 2rem; 87 | margin-bottom: 4rem; 88 | align-items: start; 89 | } 90 | 91 | .FeatureSection :deep(h2) { 92 | margin-top: 0; 93 | color: var(--text-primary); 94 | } 95 | 96 | .FeatureSection :deep(pre) { 97 | margin: 1rem 0; 98 | background: var(--bg-code-block); 99 | border: 1px solid var(--border-color); 100 | } 101 | 102 | .FeatureSection :deep(p) { 103 | margin: 1rem 0; 104 | color: var(--text-secondary); 105 | } 106 | 107 | @media (max-width: 768px) { 108 | .FeatureSection { 109 | grid-template-columns: 1fr; 110 | } 111 | } -------------------------------------------------------------------------------- /.roomodes: -------------------------------------------------------------------------------- 1 | { 2 | "customModes": [ 3 | { 4 | "slug": "ask-probe", 5 | "name": "Ask Probe", 6 | "roleDefinition": "You intelligence assistant for developers, product managers, QA engineers, and documentation writers, designed to search and analyze multi-language codebases efficiently. Instead of standard file search and file read tools you should use Probe Agent tool, and forward it all the questions about the codebase.", 7 | "customInstructions": "Where relevant, add mermaid diagrams.", 8 | "groups": [ 9 | "read", 10 | "mcp" 11 | ], 12 | "source": "project" 13 | }, 14 | { 15 | "slug": "doc-writer", 16 | "name": "Documentation Writer", 17 | "roleDefinition": "You are Roo, a technical documentation specialist focused on creating and maintaining high-quality documentation for the Probe code search tool. Your expertise includes:\n- Writing clear, concise, and accurate technical documentation\n- Organizing information in a logical and user-friendly manner\n- Maintaining consistent style and formatting across documentation\n- Creating examples that effectively demonstrate features\n- Ensuring documentation is up-to-date with the latest features and changes\n- Understanding technical concepts and explaining them in accessible language", 18 | "customInstructions": "When updating documentation:\n\n1. **Maintain Consistency**:\n - Follow existing formatting patterns and style conventions\n - Use consistent heading levels (# for main titles, ## for sections, etc.)\n - Maintain the existing frontmatter structure in files that have it\n\n2. **Content Guidelines**:\n - Be concise but thorough - aim for clarity above all\n - Include practical examples where appropriate\n - Use code blocks with proper syntax highlighting\n - Structure content with clear headings and bullet points\n - Focus on user benefits, not just feature descriptions\n\n3. **Technical Accuracy**:\n - Ensure command examples are correct and tested\n - Verify that feature descriptions match actual implementation\n - Update version numbers and compatibility information as needed\n - Cross-reference related documentation sections\n\n4. **Special Components**:\n - Use for code examples\n - Use for CLI commands\n - Maintain proper frontmatter for pages that use it\n\n5. **Navigation**:\n - Ensure proper linking between related documentation pages\n - Update navigation references when adding new content\n\n6. Website is located in ./site/ folder, and use vitepress", 19 | "groups": [ 20 | "read", 21 | "browser", 22 | "edit", 23 | "command" 24 | ], 25 | "source": "project" 26 | } 27 | ] 28 | } -------------------------------------------------------------------------------- /site/.vitepress/theme/components/CodeEditor.vue: -------------------------------------------------------------------------------- 1 | 23 | 24 | 40 | 41 | -------------------------------------------------------------------------------- /examples/reranker/DEBUG_OUTPUT_ANALYSIS.md: -------------------------------------------------------------------------------- 1 | # Debug Output Analysis 2 | 3 | Based on the debug output, here's exactly what's happening in our Rust implementation: 4 | 5 | ## 1. Input to score_pair() 6 | ``` 7 | Query: 'test question' 8 | Document: '// Filename: ./mcp-agent/src/agent.js\n// AI agent implementation\nimport...' 9 | ``` 10 | 11 | ## 2. Tokenization (`encode_pair`) 12 | - **Token IDs**: `[101, 3231, 3160, 102, 1013, 1013, 5371, 18442, 1024, ...]` 13 | - 101 = [CLS] 14 | - 3231, 3160 = "test question" 15 | - 102 = [SEP] 16 | - Rest = document tokens 17 | 18 | - **Token Type IDs**: `[0, 0, 0, 0, 1, 1, 1, 1, 1, 1, ...]` 19 | - First 4 tokens (including [CLS] and [SEP]) = 0 (query segment) 20 | - Remaining tokens = 1 (document segment) 21 | - ✅ This is CORRECT! 22 | 23 | - **Structure**: `[CLS] test question [SEP] // Filename: ./mcp-agent/src/agent.js ...` 24 | 25 | ## 3. Model Input Tensors 26 | - **input_ids**: Shape [1, 512] - padded to max length 27 | - **attention_mask**: Shape [1, 512] - 1s for real tokens, 0s for padding 28 | - **token_type_ids**: Shape [1, 512] - 0s for query, 1s for document 29 | 30 | ## 4. BERT Processing 31 | - **CLS output**: Shape [1, 128] (hidden size = 128 for TinyBERT) 32 | - **CLS values**: `[-0.041968495, -0.4378377, 0.58510137, 1.540222, ...]` 33 | - These are the contextualized embeddings for the [CLS] token 34 | 35 | ## 5. Classifier Output 36 | - **Logits**: Shape [1, 1] - single score 37 | - **Raw score**: 0.833216 (for this example) 38 | 39 | ## Key Observations 40 | 41 | 1. **Tokenization is correct**: Using `encode_pair()` properly generates: 42 | - Correct special tokens ([CLS], [SEP]) 43 | - Correct token type IDs (0 for query, 1 for document) 44 | 45 | 2. **Model inputs are correct**: All tensors have the right shape and values 46 | 47 | 3. **BERT is processing correctly**: Getting proper hidden states 48 | 49 | 4. **Scores are reasonable**: Raw logits in expected range 50 | 51 | ## The Real Issue 52 | 53 | The implementation is correct. The problem is that TinyBERT (4M parameters) produces very similar scores for different queries: 54 | - "test question" → 0.833216 55 | - "how does authentication work" → ~0.85-0.88 (from earlier tests) 56 | 57 | The model just isn't discriminating well between relevant and irrelevant queries because it's too small. 58 | 59 | ## To Verify Further 60 | 61 | Add this temporary debug to see exact token-by-token breakdown: 62 | ```rust 63 | // After encoding 64 | for (i, (token_id, type_id)) in encoding.get_ids().iter() 65 | .zip(encoding.get_type_ids().iter()) 66 | .enumerate() 67 | .take(20) { 68 | let token_text = self.tokenizer.decode(&[*token_id], false).unwrap_or_default(); 69 | println!(" [{}] '{}' (ID: {}, Type: {})", i, token_text, token_id, type_id); 70 | } 71 | ``` -------------------------------------------------------------------------------- /npm/tests/unit/mermaidInfiniteLoopFix.test.js: -------------------------------------------------------------------------------- 1 | import { jest, beforeEach, describe, it, expect } from '@jest/globals'; 2 | import { validateMermaidDiagram, validateAndFixMermaidResponse, MermaidFixingAgent } from '../../src/agent/schemaUtils.js'; 3 | 4 | describe('Mermaid Infinite Loop Fix', () => { 5 | describe('Node label quote handling', () => { 6 | it('should validate that HTML entities work in Mermaid diagrams', async () => { 7 | const diagramWithEntities = `graph TD 8 | A["Process "data" file"] 9 | B["Handle 'special' case"] 10 | C{"Check "status""}`; 11 | 12 | const validation = await validateMermaidDiagram(diagramWithEntities); 13 | 14 | // HTML entities should not trigger single quote validation errors 15 | if (!validation.isValid) { 16 | expect(validation.error).not.toMatch(/Single quotes in node label/); 17 | expect(validation.error).not.toMatch(/got PS/); 18 | } 19 | }); 20 | }); 21 | 22 | describe('Diamond node quote handling', () => { 23 | }); 24 | 25 | describe('MermaidFixingAgent should not pass schema to avoid infinite loops', () => { 26 | it('should call agent.answer without schema parameter', async () => { 27 | // Create a mock ProbeAgent 28 | const mockAgent = { 29 | answer: jest.fn().mockResolvedValue('```mermaid\ngraph TD\n A --> B\n```') 30 | }; 31 | 32 | // Create MermaidFixingAgent and inject mock 33 | const fixer = new MermaidFixingAgent({ debug: false }); 34 | await fixer.initializeAgent(); 35 | fixer.agent = mockAgent; 36 | 37 | // Call fixMermaidDiagram 38 | const brokenDiagram = 'graph TD\n A["broken (syntax"]'; 39 | await fixer.fixMermaidDiagram(brokenDiagram, ['line 1: unclosed bracket'], {}); 40 | 41 | // Verify that answer was called without schema 42 | expect(mockAgent.answer).toHaveBeenCalled(); 43 | const callArgs = mockAgent.answer.mock.calls[0]; 44 | expect(callArgs[0]).toContain('Analyze and fix'); // prompt 45 | expect(callArgs[1]).toEqual([]); // messages array 46 | 47 | // Critical: verify no schema in options (either no 3rd arg or 3rd arg has no schema) 48 | if (callArgs.length >= 3) { 49 | expect(callArgs[2]).not.toHaveProperty('schema'); 50 | } 51 | }); 52 | 53 | it('should initialize ProbeAgent with maxIterations set to 10', async () => { 54 | // Create MermaidFixingAgent 55 | const fixer = new MermaidFixingAgent({ debug: false }); 56 | 57 | // Initialize the agent 58 | const agent = await fixer.initializeAgent(); 59 | 60 | // Verify maxIterations is set to 10 (increased from 2 to handle complex diagrams) 61 | expect(agent.maxIterations).toBe(10); 62 | }); 63 | }); 64 | }); 65 | -------------------------------------------------------------------------------- /src/simd_test.rs: -------------------------------------------------------------------------------- 1 | use crate::simd_ranking::SparseVector; 2 | use ahash::AHashMap as HashMap; 3 | use simsimd::SpatialSimilarity; 4 | 5 | pub fn test_simd_implementation() { 6 | println!("Testing SIMD implementation..."); 7 | 8 | // Test 1: Direct SimSIMD dot product 9 | let a = vec![1.0f32, 2.0, 3.0]; 10 | let b = vec![4.0f32, 5.0, 6.0]; 11 | let expected_dot = 1.0 * 4.0 + 2.0 * 5.0 + 3.0 * 6.0; // = 4 + 10 + 18 = 32 12 | 13 | println!("Testing direct SimSIMD dot product:"); 14 | println!("a = {a:?}, b = {b:?}"); 15 | println!("Expected dot product: {expected_dot}"); 16 | 17 | if let Some(simd_dot) = f32::dot(&a, &b) { 18 | println!("SimSIMD dot product: {simd_dot}"); 19 | println!( 20 | "SimSIMD is working: {}", 21 | (simd_dot - expected_dot).abs() < 0.001 22 | ); 23 | } else { 24 | println!("SimSIMD dot product FAILED!"); 25 | } 26 | 27 | // Test 2: Sparse vector operations 28 | let mut tf_map1 = HashMap::new(); 29 | tf_map1.insert(0u8, 1); 30 | tf_map1.insert(1u8, 2); 31 | tf_map1.insert(2u8, 3); 32 | 33 | let mut tf_map2 = HashMap::new(); 34 | tf_map2.insert(1u8, 4); 35 | tf_map2.insert(2u8, 5); 36 | tf_map2.insert(3u8, 6); 37 | 38 | let sparse1 = SparseVector::from_tf_map(&tf_map1); 39 | let sparse2 = SparseVector::from_tf_map(&tf_map2); 40 | 41 | println!("\nTesting sparse vectors:"); 42 | println!( 43 | "Vector 1: indices={:?}, values={:?}", 44 | sparse1.indices, sparse1.values 45 | ); 46 | println!( 47 | "Vector 2: indices={:?}, values={:?}", 48 | sparse2.indices, sparse2.values 49 | ); 50 | 51 | // Test intersection 52 | let intersection = sparse1.intersect_indices(&sparse2); 53 | println!("Intersection: {intersection:?}"); 54 | 55 | // Test dot product (should be 2*4 + 3*5 = 8 + 15 = 23) 56 | let dot_product = sparse1.dot_product(&sparse2); 57 | println!("Sparse SIMD dot product: {dot_product}"); 58 | 59 | // Test manual calculation 60 | let manual_dot = sparse1.manual_dot_product(&sparse2); 61 | println!("Manual dot product: {manual_dot}"); 62 | 63 | // Test optimized intersection 64 | let (vals1, vals2) = sparse1.intersect_with_values(&sparse2); 65 | println!("Intersected values: {vals1:?} • {vals2:?}"); 66 | 67 | if let Some(direct_simd) = f32::dot(&vals1, &vals2) { 68 | println!("Direct SIMD on intersected values: {direct_simd}"); 69 | } 70 | 71 | assert_eq!(intersection, vec![1, 2]); 72 | assert_eq!(manual_dot, 23.0); 73 | assert!( 74 | (dot_product - 23.0).abs() < 0.001, 75 | "SIMD dot product should be 23.0, got {dot_product}" 76 | ); 77 | 78 | println!("SIMD test completed successfully!"); 79 | } 80 | -------------------------------------------------------------------------------- /site/.vitepress/theme/components/Feature.vue: -------------------------------------------------------------------------------- 1 | 22 | 23 | 31 | 32 | -------------------------------------------------------------------------------- /npm/src/agent/engines/enhanced-vercel.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Enhanced Vercel AI SDK Engine with proper tool and prompt support 3 | */ 4 | 5 | import { streamText } from 'ai'; 6 | 7 | /** 8 | * Create an enhanced Vercel AI SDK engine with full tool support 9 | * @param {Object} agent - The ProbeAgent instance 10 | * @returns {Object} Engine interface 11 | */ 12 | export function createEnhancedVercelEngine(agent) { 13 | return { 14 | /** 15 | * Query the model using existing Vercel AI SDK implementation 16 | * @param {string} prompt - The prompt to send 17 | * @param {Object} options - Additional options 18 | * @returns {AsyncIterable} Response stream 19 | */ 20 | async *query(prompt, options = {}) { 21 | // Get the system message with tools embedded (existing behavior) 22 | const systemMessage = await agent.getSystemMessage(); 23 | 24 | // Build messages array with system prompt 25 | const messages = [ 26 | { role: 'system', content: systemMessage }, 27 | ...agent.history, 28 | { role: 'user', content: prompt } 29 | ]; 30 | 31 | // Use existing streamText with retry and fallback 32 | const result = await agent.streamTextWithRetryAndFallback({ 33 | model: agent.provider(agent.model), 34 | messages, 35 | maxTokens: options.maxTokens || agent.maxResponseTokens, 36 | temperature: options.temperature || 0.3, 37 | // Note: Vercel AI SDK doesn't use structured tools for XML format 38 | // The tools are embedded in the system prompt 39 | experimental_telemetry: options.telemetry 40 | }); 41 | 42 | // Stream the response 43 | let fullContent = ''; 44 | for await (const chunk of result.textStream) { 45 | fullContent += chunk; 46 | yield { type: 'text', content: chunk }; 47 | } 48 | 49 | // Parse XML tool calls from the response if any 50 | // This maintains compatibility with existing XML tool format 51 | const toolCalls = agent.parseXmlToolCalls ? agent.parseXmlToolCalls(fullContent) : null; 52 | if (toolCalls && toolCalls.length > 0) { 53 | yield { type: 'tool_calls', toolCalls }; 54 | } 55 | 56 | // Handle finish reason 57 | if (result.finishReason) { 58 | yield { type: 'finish', reason: result.finishReason }; 59 | } 60 | }, 61 | 62 | /** 63 | * Get available tools for this engine 64 | */ 65 | getTools() { 66 | return agent.toolImplementations || {}; 67 | }, 68 | 69 | /** 70 | * Get system prompt for this engine 71 | */ 72 | async getSystemPrompt() { 73 | return agent.getSystemMessage(); 74 | }, 75 | 76 | /** 77 | * Optional cleanup 78 | */ 79 | async close() { 80 | // Nothing to cleanup for Vercel AI 81 | } 82 | }; 83 | } -------------------------------------------------------------------------------- /npm/tests/README.md: -------------------------------------------------------------------------------- 1 | # Test Suite for Bundled Binaries 2 | 3 | This directory contains tests for the bundled binary extraction functionality. 4 | 5 | ## Test Files 6 | 7 | ### `extractor.test.js` 8 | Unit tests for the binary extractor module (`src/extractor.js`). 9 | 10 | **Coverage:** 11 | - ✅ Platform detection (Linux, macOS, Windows) 12 | - ✅ Unsupported platform error handling 13 | - ✅ tar.gz archive extraction 14 | - ✅ ZIP archive extraction (Windows) 15 | - ✅ Path traversal security validation 16 | - ✅ Error handling for missing binaries 17 | - ✅ Error handling for empty archives 18 | 19 | **Security Tests:** 20 | - Path traversal attacks (../ sequences) 21 | - Absolute path rejection 22 | - Malicious archive handling 23 | 24 | ### `extractor-integration.test.js` 25 | Integration tests that verify the extraction logic without requiring actual binary files. 26 | 27 | **Coverage:** 28 | - ✅ Platform detection logic for all 5 supported platforms 29 | - ✅ Path safety validation 30 | - ✅ Archive naming conventions 31 | - ✅ Binary name detection (Windows vs Unix) 32 | - ✅ Security validations 33 | 34 | **Security Tests:** 35 | - `isPathSafe()` logic verification 36 | - Path normalization 37 | - Relative path validation 38 | - Directory traversal prevention 39 | 40 | ## Running Tests 41 | 42 | ```bash 43 | # Run all tests 44 | npm test 45 | 46 | # Run with coverage 47 | npm run test:coverage 48 | 49 | # Run in watch mode 50 | npm run test:watch 51 | 52 | # Run verbose 53 | npm run test:verbose 54 | ``` 55 | 56 | ## Security Test Coverage 57 | 58 | All security-critical functions have test coverage: 59 | 60 | 1. **Path Traversal Prevention** ✅ 61 | - Tests verify `../ `sequences are rejected 62 | - Tests verify absolute paths are rejected 63 | - Tests verify safe relative paths are accepted 64 | 65 | 2. **Archive Extraction** ✅ 66 | - tar.gz extraction with path validation 67 | - ZIP extraction with path validation 68 | - Malicious archive rejection 69 | 70 | 3. **Platform Detection** ✅ 71 | - All 5 platforms correctly mapped 72 | - Unsupported platforms throw errors 73 | - Correct file extensions selected 74 | 75 | ## Test Dependencies 76 | 77 | - `@jest/globals` - Test framework 78 | - `fs-extra` - File system operations 79 | - `tar` - tar.gz extraction 80 | - `adm-zip` - ZIP extraction (dynamically imported) 81 | 82 | ## Notes 83 | 84 | - Tests use dynamic imports for `adm-zip` to handle cases where it's not yet installed 85 | - Tests skip platform-specific functionality (e.g., Windows ZIP tests on macOS) 86 | - Security tests run on all platforms and verify the core logic 87 | - Integration tests don't require actual binary files, only test the logic 88 | 89 | ## Coverage Goals 90 | 91 | - ✅ Lines: >70% 92 | - ✅ Functions: >70% 93 | - ✅ Branches: >70% 94 | - ✅ Statements: >70% 95 | 96 | Security-critical functions should aim for 100% coverage. 97 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | Probe does not maintain long-term release branches. Security fixes are only included in new releases moving forward, rather than being backported to previous versions. 6 | 7 | | Version | Supported | 8 | | ------- | ------------------ | 9 | | latest | :white_check_mark: | 10 | | < latest | :x: | 11 | 12 | ## Reporting a Vulnerability 13 | 14 | We take the security of Probe seriously. If you believe you've found a security vulnerability, please follow these steps: 15 | 16 | ### For Public Vulnerabilities 17 | 18 | If the vulnerability is not sensitive and does not put users at immediate risk: 19 | 20 | 1. **Open an Issue**: Create a regular issue on our GitHub repository with the `[security]` prefix in the title. 21 | 2. **Provide Details**: Include a clear description of the vulnerability, steps to reproduce, and potential impact. 22 | 3. **Suggest a Fix**: If possible, suggest how the vulnerability might be addressed. 23 | 24 | ### For Sensitive Vulnerabilities 25 | 26 | If the vulnerability is sensitive or could put users at immediate risk: 27 | 28 | 1. **Do Not Open a Public Issue**: Please do not disclose sensitive vulnerabilities publicly. 29 | 2. **Email the Maintainers**: Send an email to leonsbox@gmail.com with details about the vulnerability. 30 | 3. **Use Encryption**: If possible, encrypt your message using our PGP key (available upon request). 31 | 4. **Be Patient**: We'll acknowledge receipt of your report within 48 hours and provide a timeline for addressing the issue. 32 | 33 | ## What to Include in Your Report 34 | 35 | When reporting a vulnerability, please include: 36 | 37 | - A clear description of the vulnerability 38 | - Steps to reproduce the issue 39 | - Potential impact of the vulnerability 40 | - Any potential mitigations you've identified 41 | - Your contact information for follow-up questions 42 | 43 | ## Our Commitment 44 | 45 | When we receive a security report, we will: 46 | 47 | 1. Confirm receipt of the report within 48 hours 48 | 2. Provide an initial assessment of the report within 7 days 49 | 3. Keep you informed about our progress addressing the issue 50 | 4. Credit you when we release a fix (unless you prefer to remain anonymous) 51 | 52 | ## Security Update Policy 53 | 54 | - Security fixes will be released as part of regular new releases 55 | - We do not maintain or backport security fixes to previous versions 56 | - Users are encouraged to always use the latest version of Probe 57 | 58 | ## Best Practices for Users 59 | 60 | To minimize security risks when using Probe: 61 | 62 | 1. Always use the latest version 63 | 2. Be cautious when running Probe on untrusted codebases 64 | 3. Review the permissions granted to Probe in your environment 65 | 4. Follow security best practices for your operating system 66 | 67 | Thank you for helping keep Probe and its users secure! -------------------------------------------------------------------------------- /site/DEPLOYMENT.md: -------------------------------------------------------------------------------- 1 | # Cloudflare Pages Deployment 2 | 3 | This documentation site is deployed using Cloudflare Pages with automatic deployments from the main branch to **probelabs.com**. 4 | 5 | ## Deployment Configuration 6 | 7 | ### Files 8 | - `wrangler.toml` - Cloudflare Pages configuration 9 | - `public/_headers` - HTTP headers for security and caching 10 | - `public/_redirects` - URL redirect rules 11 | - `.env.example` - Environment variable template 12 | 13 | ### Build Settings 14 | - **Build command**: `npm run build` 15 | - **Build output directory**: `.vitepress/dist` 16 | - **Root directory**: `site` 17 | - **Node.js version**: 20 18 | 19 | ## Setup Instructions 20 | 21 | ### 1. Cloudflare Pages Setup 22 | 1. Go to [Cloudflare Pages](https://pages.cloudflare.com/) 23 | 2. Connect your GitHub repository 24 | 3. Configure the build settings: 25 | - **Project name**: `probe-docs` 26 | - **Production branch**: `main` 27 | - **Build command**: `npm run build` 28 | - **Build output directory**: `.vitepress/dist` 29 | - **Root directory**: `site` 30 | 31 | ### 2. Environment Variables 32 | Set these in Cloudflare Pages dashboard if needed: 33 | - `NODE_VERSION`: `20` 34 | - `NPM_VERSION`: `latest` 35 | 36 | ### 3. Custom Domain Setup 37 | The site is configured to deploy to **probelabs.com**. To set this up: 38 | 39 | 1. In Cloudflare Pages dashboard, go to Custom domains 40 | 2. Add the domain `probelabs.com` 41 | 3. Add a redirect from `www.probelabs.com` to `probelabs.com` (already configured in `_redirects`) 42 | 4. Update your DNS records: 43 | - **A Record**: `probelabs.com` → Your Cloudflare Pages IP 44 | - **CNAME**: `www.probelabs.com` → `probelabs.com` 45 | 5. The domain configuration is already set in `wrangler.toml` 46 | 47 | ## Build Process 48 | 49 | The site builds automatically when: 50 | - Code is pushed to the `main` branch 51 | - Pull requests are created (preview deployments) 52 | 53 | ### Local Development 54 | ```bash 55 | cd site 56 | npm install 57 | npm run dev 58 | ``` 59 | 60 | ### Local Build Test 61 | ```bash 62 | cd site 63 | npm run build 64 | npm run preview 65 | ``` 66 | 67 | ## Troubleshooting 68 | 69 | ### Common Issues 70 | 1. **Build fails**: Check Node.js version is 20 71 | 2. **Assets not loading**: Verify `public/` directory structure 72 | 3. **404 errors**: Check `_redirects` file configuration 73 | 74 | ### Logs 75 | Build logs are available in the Cloudflare Pages dashboard under the deployment details. 76 | 77 | ## Migration Notes 78 | 79 | This site was migrated from GitHub Pages. The old workflow file has been disabled and renamed to `vitepress-gh-pages.yml.disabled`. 80 | 81 | ## Performance Features 82 | 83 | - **Edge deployment**: Served from Cloudflare's global edge network 84 | - **Automatic HTTPS**: SSL certificates managed automatically 85 | - **Caching**: Optimized caching headers for static assets 86 | - **Security headers**: CSP and security headers configured -------------------------------------------------------------------------------- /npm/src/tools/langchain.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Tools for LangChain 3 | * @module tools/langchain 4 | */ 5 | 6 | import { search } from '../search.js'; 7 | import { query } from '../query.js'; 8 | import { extract } from '../extract.js'; 9 | import { searchSchema, querySchema, extractSchema, searchDescription, queryDescription, extractDescription, parseTargets } from './common.js'; 10 | 11 | // LangChain tool for searching code 12 | export function createSearchTool(options = {}) { 13 | const { cwd } = options; 14 | 15 | return { 16 | name: 'search', 17 | description: searchDescription, 18 | schema: searchSchema, 19 | func: async ({ query: searchQuery, path, allow_tests, exact, maxResults, maxTokens = 10000, language }) => { 20 | try { 21 | const results = await search({ 22 | query: searchQuery, 23 | path, 24 | cwd, // Working directory for resolving relative paths 25 | allowTests: allow_tests ?? true, 26 | exact, 27 | json: false, 28 | maxResults, 29 | maxTokens, 30 | language 31 | }); 32 | 33 | return results; 34 | } catch (error) { 35 | console.error('Error executing search command:', error); 36 | return `Error executing search command: ${error.message}`; 37 | } 38 | } 39 | }; 40 | } 41 | 42 | // LangChain tool for querying code 43 | export function createQueryTool(options = {}) { 44 | const { cwd } = options; 45 | 46 | return { 47 | name: 'query', 48 | description: queryDescription, 49 | schema: querySchema, 50 | func: async ({ pattern, path, language, allow_tests }) => { 51 | try { 52 | const results = await query({ 53 | pattern, 54 | path, 55 | cwd, // Working directory for resolving relative paths 56 | language, 57 | allowTests: allow_tests ?? true, 58 | json: false 59 | }); 60 | 61 | return results; 62 | } catch (error) { 63 | console.error('Error executing query command:', error); 64 | return `Error executing query command: ${error.message}`; 65 | } 66 | } 67 | }; 68 | } 69 | 70 | // LangChain tool for extracting code 71 | export function createExtractTool(options = {}) { 72 | const { cwd } = options; 73 | 74 | return { 75 | name: 'extract', 76 | description: extractDescription, 77 | schema: extractSchema, 78 | func: async ({ targets, line, end_line, allow_tests, context_lines, format }) => { 79 | try { 80 | // Split targets on whitespace to support multiple targets in one call 81 | const files = parseTargets(targets); 82 | 83 | const results = await extract({ 84 | files, 85 | cwd, // Working directory for resolving relative paths 86 | allowTests: allow_tests ?? true, 87 | contextLines: context_lines, 88 | format 89 | }); 90 | 91 | return results; 92 | } catch (error) { 93 | console.error('Error executing extract command:', error); 94 | return `Error executing extract command: ${error.message}`; 95 | } 96 | } 97 | }; 98 | } -------------------------------------------------------------------------------- /tests/extract_input_file_tests.rs: -------------------------------------------------------------------------------- 1 | use std::fs; 2 | use std::process::Command; 3 | use tempfile::TempDir; 4 | 5 | #[test] 6 | fn test_extract_command_with_input_file() { 7 | // Create a temporary directory for our test files 8 | let temp_dir = TempDir::new().expect("Failed to create temp dir"); 9 | 10 | // Create a test source file with simple content 11 | let source_file_path = temp_dir.path().join("test_source.rs"); 12 | let source_content = r#" 13 | fn main() { 14 | println!("Hello, world!"); 15 | } 16 | "#; 17 | fs::write(&source_file_path, source_content).unwrap(); 18 | 19 | // Create an input file that contains the path to the source file 20 | let input_file_path = temp_dir.path().join("input.txt"); 21 | fs::write( 22 | &input_file_path, 23 | source_file_path.to_string_lossy().as_bytes(), 24 | ) 25 | .unwrap(); 26 | 27 | // Get the project root directory (where Cargo.toml is) 28 | let project_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); 29 | 30 | // Run the extract command with --help to verify the new option exists 31 | let help_output = Command::new("cargo") 32 | .args([ 33 | "run", 34 | "--manifest-path", 35 | project_dir.join("Cargo.toml").to_string_lossy().as_ref(), 36 | "--", 37 | "extract", 38 | "--help", 39 | ]) 40 | .output() 41 | .expect("Failed to execute help command"); 42 | 43 | // Check that the help output includes the new option 44 | let help_text = String::from_utf8_lossy(&help_output.stdout); 45 | assert!( 46 | help_text.contains("-F, --input-file"), 47 | "Help text should include the new --input-file option" 48 | ); 49 | } 50 | 51 | #[test] 52 | fn test_extract_command_with_nonexistent_input_file() { 53 | // Get the project root directory (where Cargo.toml is) 54 | let project_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); 55 | 56 | // Run the extract command with a nonexistent input file 57 | let output = Command::new("cargo") 58 | .args([ 59 | "run", 60 | "--manifest-path", 61 | project_dir.join("Cargo.toml").to_string_lossy().as_ref(), 62 | "--", 63 | "extract", 64 | "--input-file", 65 | "nonexistent_file.txt", 66 | ]) 67 | .output() 68 | .expect("Failed to execute command"); 69 | 70 | // The command should fail 71 | assert!( 72 | !output.status.success(), 73 | "Command should fail with nonexistent file" 74 | ); 75 | 76 | // Get the error output as a string 77 | let stderr = String::from_utf8_lossy(&output.stderr); 78 | 79 | // The error should mention the nonexistent file 80 | assert!( 81 | stderr.contains("nonexistent_file.txt"), 82 | "Error should mention the nonexistent file" 83 | ); 84 | } 85 | -------------------------------------------------------------------------------- /examples/reranker/test_all_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "🧠 COMPREHENSIVE BERT MODEL COMPARISON" 4 | echo "======================================" 5 | echo "" 6 | 7 | cd /Users/leonidbugaev/go/src/code-search/examples/reranker 8 | 9 | echo "=== SEQUENTIAL PERFORMANCE COMPARISON ===" 10 | echo "" 11 | 12 | echo "🔬 Sequential TinyBERT-L2 (~4M params, fastest):" 13 | ./target/release/benchmark --model "cross-encoder/ms-marco-TinyBERT-L-2-v2" --query "search optimization algorithm" --num-docs 40 --iterations 2 --batch-size 20 14 | 15 | echo "" 16 | echo "🔬 Sequential MiniLM-L2 (~22M params, balanced):" 17 | ./target/release/benchmark --model "cross-encoder/ms-marco-MiniLM-L-2-v2" --query "search optimization algorithm" --num-docs 40 --iterations 2 --batch-size 20 18 | 19 | echo "" 20 | echo "🔬 Sequential MiniLM-L6 (~85M params, most accurate):" 21 | ./target/release/benchmark --model "cross-encoder/ms-marco-MiniLM-L-6-v2" --query "search optimization algorithm" --num-docs 40 --iterations 2 --batch-size 20 22 | 23 | echo "" 24 | echo "=== PARALLEL PERFORMANCE COMPARISON ===" 25 | echo "" 26 | 27 | echo "🚀 Parallel TinyBERT-L2 (10 cores):" 28 | ./target/release/benchmark --model "cross-encoder/ms-marco-TinyBERT-L-2-v2" --parallel --query "machine learning inference" --num-docs 60 --iterations 2 29 | 30 | echo "" 31 | echo "🚀 Parallel MiniLM-L2 (10 cores):" 32 | ./target/release/benchmark --model "cross-encoder/ms-marco-MiniLM-L-2-v2" --parallel --query "machine learning inference" --num-docs 60 --iterations 2 33 | 34 | echo "" 35 | echo "🚀 Parallel MiniLM-L6 (10 cores):" 36 | ./target/release/benchmark --model "cross-encoder/ms-marco-MiniLM-L-6-v2" --parallel --query "machine learning inference" --num-docs 60 --iterations 2 37 | 38 | echo "" 39 | echo "=== COMPREHENSIVE PERFORMANCE SUMMARY ===" 40 | echo "" 41 | 42 | echo "📊 BERT MODEL PERFORMANCE ANALYSIS:" 43 | echo "" 44 | echo "| Model | Parameters | Sequential | Parallel | Speedup | Use Case |" 45 | echo "|--------------|------------|--------------|--------------|---------|----------------------|" 46 | echo "| TinyBERT-L2 | ~4M | ~32 docs/sec | ~200 docs/sec| ~6x | High-speed, basic |" 47 | echo "| MiniLM-L2 | ~22M | ~8 docs/sec | ~35 docs/sec | ~4x | Balanced speed/quality|" 48 | echo "| MiniLM-L6 | ~85M | ~3 docs/sec | ~10 docs/sec | ~3x | High accuracy |" 49 | echo "" 50 | echo "🎯 RECOMMENDATIONS:" 51 | echo "" 52 | echo "✅ **TinyBERT-L2**: Use for high-throughput applications where speed > accuracy" 53 | echo "✅ **MiniLM-L2**: Best balance of speed and semantic quality (RECOMMENDED)" 54 | echo "✅ **MiniLM-L6**: Use when maximum accuracy is critical, throughput is secondary" 55 | echo "" 56 | echo "🚀 **PARALLEL PROCESSING BENEFITS:**" 57 | echo "• TinyBERT-L2: 6x speedup (32 → 200 docs/sec)" 58 | echo "• MiniLM-L2: 4x speedup (8 → 35 docs/sec)" 59 | echo "• MiniLM-L6: 3x speedup (3 → 10 docs/sec)" 60 | echo "" 61 | echo "======================================" 62 | echo "🎉 ALL BERT MODELS TESTED SUCCESSFULLY!" 63 | echo "======================================" -------------------------------------------------------------------------------- /npm/tests/unit/mermaidHtmlEntities.test.js: -------------------------------------------------------------------------------- 1 | import { jest, describe, it, expect } from '@jest/globals'; 2 | import { validateMermaidDiagram } from '../../src/agent/schemaUtils.js'; 3 | 4 | describe('Mermaid HTML Entities Support', () => { 5 | it('should accept HTML entities in node labels as valid', async () => { 6 | // Test case based on Mermaid documentation best practices 7 | const diagramWithEntities = `graph TD 8 | A["Process "data" file"] 9 | B["Node with 'single quotes'"] 10 | C{"Check "status" value"} 11 | D["Mixed "double" and 'single' quotes"]`; 12 | 13 | const validation = await validateMermaidDiagram(diagramWithEntities); 14 | 15 | // HTML entities should be valid according to Mermaid specs 16 | expect(validation.isValid).toBe(true); 17 | if (!validation.isValid) { 18 | console.log('Validation error:', validation.error); 19 | } 20 | }); 21 | 22 | it('should accept numeric HTML entities', async () => { 23 | const diagramWithNumericEntities = `graph TD 24 | A["Quote: " and apostrophe: '"] 25 | B["Hash: # and ampersand: &"]`; 26 | 27 | const validation = await validateMermaidDiagram(diagramWithNumericEntities); 28 | expect(validation.isValid).toBe(true); 29 | }); 30 | 31 | it('should accept mixed HTML entities and regular text', async () => { 32 | const diagram = `flowchart LR 33 | A["Starting point"] 34 | B["Process "important" data"] 35 | C["Check if value = "expected""] 36 | D["Output: 'success' or 'failure'"]`; 37 | 38 | const validation = await validateMermaidDiagram(diagram); 39 | expect(validation.isValid).toBe(true); 40 | }); 41 | 42 | it('should not flag HTML entities as single quotes error', async () => { 43 | const diagram = `graph TD 44 | A["Text with ' entity"]`; 45 | 46 | const validation = await validateMermaidDiagram(diagram); 47 | 48 | // Should not trigger the single quote validation error 49 | if (!validation.isValid) { 50 | expect(validation.error).not.toContain('Single quotes in node label'); 51 | expect(validation.error).not.toContain('got PS'); 52 | } 53 | }); 54 | 55 | describe('Real-world examples from Mermaid docs', () => { 56 | it('should handle example from Mermaid documentation', async () => { 57 | // Example adapted from Mermaid official docs 58 | const diagram = `flowchart LR 59 | A["A double quote:""] 60 | B["A dec char:♥"] 61 | C["A hash:#"]`; 62 | 63 | const validation = await validateMermaidDiagram(diagram); 64 | expect(validation.isValid).toBe(true); 65 | }); 66 | 67 | it('should handle complex escaping example', async () => { 68 | // Complex example from StackOverflow Mermaid discussion 69 | const diagram = `flowchart LR 70 | B[""<<>>&½#189;""]`; 71 | 72 | const validation = await validateMermaidDiagram(diagram); 73 | expect(validation.isValid).toBe(true); 74 | }); 75 | }); 76 | }); -------------------------------------------------------------------------------- /tests/nested_symbol_extraction_tests.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use std::fs; 3 | use std::path::PathBuf; 4 | 5 | #[test] 6 | fn test_nested_symbol_extraction() -> Result<()> { 7 | // Create a temporary test file with nested symbols 8 | let test_content = r#" 9 | struct ProbeAgentServer { 10 | client: Client, 11 | config: Config, 12 | } 13 | 14 | impl ProbeAgentServer { 15 | pub fn new(client: Client, config: Config) -> Self { 16 | Self { client, config } 17 | } 18 | 19 | pub fn setupToolHandlers(&self) { 20 | // Setup tool handlers 21 | println!("Setting up tool handlers"); 22 | 23 | // Register search handler 24 | self.register_handler("search", |params| { 25 | // Search implementation 26 | }); 27 | } 28 | 29 | fn register_handler(&self, name: &str, handler: impl Fn(&str)) { 30 | // Register handler implementation 31 | } 32 | } 33 | "#; 34 | 35 | // Write the test content to a temporary file 36 | let temp_dir = tempfile::tempdir()?; 37 | let file_path = temp_dir.path().join("test_nested_symbols.rs"); 38 | fs::write(&file_path, test_content)?; 39 | 40 | // Test extracting the nested symbol 41 | let result = extract_nested_symbol(&file_path, "ProbeAgentServer.setupToolHandlers")?; 42 | 43 | // Verify the result contains the setupToolHandlers method 44 | assert!(result.contains("pub fn setupToolHandlers")); 45 | assert!(result.contains("Setting up tool handlers")); 46 | 47 | // Clean up 48 | temp_dir.close()?; 49 | 50 | Ok(()) 51 | } 52 | 53 | // Helper function to extract a nested symbol from a file 54 | fn extract_nested_symbol(path: &PathBuf, symbol: &str) -> Result { 55 | // Read the file content 56 | let content = fs::read_to_string(path)?; 57 | 58 | // Call the symbol finder function 59 | let result = probe_code::extract::symbol_finder::find_symbol_in_file( 60 | path, symbol, &content, true, // allow_tests 61 | 0, // context_lines 62 | )?; 63 | 64 | Ok(result.code) 65 | } 66 | 67 | #[test] 68 | fn test_simple_symbol_extraction() -> Result<()> { 69 | // Create a temporary test file with a simple symbol 70 | let test_content = r#" 71 | struct Config { 72 | pub path: String, 73 | pub timeout: u64, 74 | } 75 | 76 | impl Config { 77 | pub fn new(path: String, timeout: u64) -> Self { 78 | Self { path, timeout } 79 | } 80 | } 81 | "#; 82 | 83 | // Write the test content to a temporary file 84 | let temp_dir = tempfile::tempdir()?; 85 | let file_path = temp_dir.path().join("test_simple_symbol.rs"); 86 | fs::write(&file_path, test_content)?; 87 | 88 | // Test extracting a simple symbol 89 | let result = extract_nested_symbol(&file_path, "Config")?; 90 | 91 | // Verify the result contains the Config struct 92 | assert!(result.contains("struct Config")); 93 | assert!(result.contains("pub path: String")); 94 | 95 | // Clean up 96 | temp_dir.close()?; 97 | 98 | Ok(()) 99 | } 100 | --------------------------------------------------------------------------------