├── .windsurfrules
├── npm
    ├── bin
    │   ├── .gitkeep
    │   ├── binaries
    │   │   ├── .gitkeep
    │   │   └── README.md
    │   ├── .gitignore
    │   └── README.md
    ├── src
    │   ├── agent
    │   │   ├── hooks
    │   │   │   └── index.js
    │   │   ├── storage
    │   │   │   ├── index.js
    │   │   │   ├── InMemoryStorageAdapter.js
    │   │   │   └── StorageAdapter.js
    │   │   ├── acp
    │   │   │   └── index.js
    │   │   ├── shared
    │   │   │   └── Session.js
    │   │   ├── mockProvider.js
    │   │   ├── mcp
    │   │   │   └── index.js
    │   │   └── engines
    │   │   │   ├── vercel.js
    │   │   │   └── enhanced-vercel.js
    │   ├── cli.js
    │   ├── tools
    │   │   ├── index.js
    │   │   └── langchain.js
    │   └── utils
    │   │   ├── symlink-utils.js
    │   │   └── path-validation.js
    ├── tsconfig.json
    ├── tests
    │   ├── unit
    │   │   ├── system-prompt.test.js
    │   │   ├── backtickAutoFix.test.js
    │   │   ├── types-probe-agent-options.test.js
    │   │   ├── extract-content.test.js
    │   │   ├── mermaidInfiniteLoopFix.test.js
    │   │   └── mermaidHtmlEntities.test.js
    │   ├── nestedQuoteFix.test.js
    │   ├── setup.js
    │   └── README.md
    ├── scripts
    │   ├── build-mcp.cjs
    │   └── build-agent.cjs
    ├── jest.config.js
    ├── test-grep.js
    ├── test-grep-simplified.js
    └── test-codex-e2e.js
├── examples
    ├── chat
    │   ├── simple-traces.jsonl
    │   ├── .gitignore
    │   ├── logo.png
    │   ├── bin
    │   │   └── probe-chat.js
    │   ├── .dockerignore
    │   ├── npm
    │   │   ├── LICENSE
    │   │   ├── .npmignore
    │   │   └── package.json
    │   ├── test-simple-tracing.js
    │   ├── test-image-spans.js
    │   ├── test-chat-tracing.js
    │   ├── test-direct-function.js
    │   ├── test-github-context.txt
    │   ├── implement
    │   │   ├── backends
    │   │   │   └── registry.js
    │   │   └── core
    │   │   │   └── timeouts.js
    │   ├── cancelRequest.js
    │   └── auth.js
    ├── reranker
    │   ├── src
    │   │   ├── lib.rs
    │   │   └── bin
    │   │   │   └── benchmark.rs
    │   ├── rust_bert_test
    │   │   ├── Cargo.toml
    │   │   └── README.md
    │   ├── requirements.txt
    │   ├── Cargo.toml
    │   ├── test_bert_results.sh
    │   ├── simple_test.py
    │   ├── MODELS.md
    │   ├── MODEL_COMPARISON.md
    │   ├── download_models.sh
    │   ├── test_parallel_performance.sh
    │   ├── test_cross_encoder.sh
    │   ├── DEBUG_OUTPUT_ANALYSIS.md
    │   └── test_all_models.sh
    └── cache_demo.rs
├── logo.png
├── test-api-key.sh
├── site
    ├── public
    │   ├── logo.png
    │   ├── moon.png
    │   ├── sun.png
    │   ├── saturn.png
    │   ├── logo_padded.png
    │   ├── _redirects
    │   ├── icons
    │   │   └── privacy-icon.svg
    │   └── _headers
    ├── .vitepress
    │   ├── components
    │   │   ├── FeatureSection.vue.d.ts
    │   │   ├── TestComponent.vue
    │   │   └── FullWidthFeatureSection.vue
    │   └── theme
    │   │   ├── components
    │   │       ├── BlogPostLayout.vue
    │   │       ├── FeatureSection.vue
    │   │       ├── CommandExample.vue
    │   │       ├── FeatureList.vue
    │   │       ├── CodeEditor.vue
    │   │       └── Feature.vue
    │   │   ├── index.js
    │   │   ├── blog.css
    │   │   └── home.css
    ├── blog.md
    ├── .env.example
    ├── wrangler.toml
    ├── contributing
    │   └── README.md
    ├── use-cases
    │   └── README.md
    ├── package.json
    └── DEPLOYMENT.md
├── src
    ├── mod.rs
    ├── language
    │   ├── common.rs
    │   ├── mod.rs
    │   ├── language_trait.rs
    │   ├── block_handling.rs
    │   ├── factory.rs
    │   ├── c.rs
    │   ├── cpp.rs
    │   ├── php.rs
    │   ├── java.rs
    │   └── ruby.rs
    ├── search
    │   ├── token_utils.rs
    │   ├── search_options.rs
    │   ├── mod.rs
    │   ├── timeout.rs
    │   ├── term_exceptions.rs
    │   ├── test_patterns.rs
    │   └── limits.rs
    ├── version.rs
    └── simd_test.rs
├── tests
    ├── fixtures
    │   └── user
    │   │   └── AssemblyInfo.cs
    ├── mocks
    │   ├── test_ip_whitelist.go
    │   ├── test_object.js
    │   └── test_struct.go
    ├── property_tests.proptest-regressions
    ├── common.rs
    ├── test_file.rs
    ├── test_tokenize.rs
    ├── schemas
    │   └── xml_output_schema.xsd
    ├── lib_usage.rs
    ├── outline_keyword_preservation_test.rs
    ├── extract_input_file_tests.rs
    └── nested_symbol_extraction_tests.rs
├── test-probe-implementation
    └── hello.js
├── codex-config
    └── config.toml
├── .githooks
    ├── pre-commit-vow
    └── post-commit
├── Cross.toml
├── .claude
    ├── settings.json
    └── commands
    │   └── performance-review.md
├── .cargo
    └── config.toml
├── test_data
    └── test_nested_struct.go
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── feature_request.md
    │   └── bug_report.md
    └── workflows
    │   ├── visor.yml
    │   ├── README-docker.md
    │   └── vitepress-gh-pages.yml.disabled
├── result1.txt
├── .dockerignore
├── result2.txt
├── Dockerfile
├── docker-compose.yml
├── scripts
    └── claude-hook-wrapper.sh
├── .prompts
    └── engineer.md
├── .gitignore
├── .roomodes
└── SECURITY.md


/.windsurfrules:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/npm/bin/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/chat/simple-traces.jsonl:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/chat/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | dist/
3 | .env


--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/buger/probe/HEAD/logo.png


--------------------------------------------------------------------------------
/test-api-key.sh:
--------------------------------------------------------------------------------
1 | export ANTHROPIC_API_KEY="your-actual-api-key-here"
2 | 


--------------------------------------------------------------------------------
/site/public/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/buger/probe/HEAD/site/public/logo.png


--------------------------------------------------------------------------------
/site/public/moon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/buger/probe/HEAD/site/public/moon.png


--------------------------------------------------------------------------------
/site/public/sun.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/buger/probe/HEAD/site/public/sun.png


--------------------------------------------------------------------------------
/examples/chat/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/buger/probe/HEAD/examples/chat/logo.png


--------------------------------------------------------------------------------
/npm/src/agent/hooks/index.js:
--------------------------------------------------------------------------------
1 | export { HookManager, HOOK_TYPES } from './HookManager.js';
2 | 


--------------------------------------------------------------------------------
/site/public/saturn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/buger/probe/HEAD/site/public/saturn.png


--------------------------------------------------------------------------------
/site/public/logo_padded.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/buger/probe/HEAD/site/public/logo_padded.png


--------------------------------------------------------------------------------
/npm/bin/binaries/.gitkeep:
--------------------------------------------------------------------------------
1 | # This file ensures the binaries directory is tracked by git even when empty
2 | 


--------------------------------------------------------------------------------
/examples/reranker/src/lib.rs:
--------------------------------------------------------------------------------
1 | pub mod reranker;
2 | pub mod benchmark;
3 | pub mod bert_simulator;
4 | pub mod parallel_reranker;


--------------------------------------------------------------------------------
/src/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod models;
2 | pub mod language;
3 | pub mod search;
4 | pub mod cli;
5 | pub mod server;
6 | pub mod ranking;
7 | 


--------------------------------------------------------------------------------
/npm/bin/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore all files in this directory
2 | *
3 | # Except these files
4 | !.gitignore
5 | !.gitkeep
6 | !README.md
7 | !probe
8 | 


--------------------------------------------------------------------------------
/tests/fixtures/user/AssemblyInfo.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft. All rights reserved.
2 | 
3 | [assembly: System.Resources.NeutralResourcesLanguage("en-US")]


--------------------------------------------------------------------------------
/npm/src/agent/storage/index.js:
--------------------------------------------------------------------------------
1 | export { StorageAdapter } from './StorageAdapter.js';
2 | export { InMemoryStorageAdapter } from './InMemoryStorageAdapter.js';
3 | 


--------------------------------------------------------------------------------
/test-probe-implementation/hello.js:
--------------------------------------------------------------------------------
1 | // A simple hello world function
2 | function hello(name) {
3 |     console.log("Hello, " + name);
4 | }
5 | 
6 | // Test the function
7 | hello("World");


--------------------------------------------------------------------------------
/site/.vitepress/components/FeatureSection.vue.d.ts:
--------------------------------------------------------------------------------
1 | declare module '@theme/components/FeatureSection.vue' {
2 |   import { DefineComponent } from 'vue'
3 |   const component: DefineComponent<{}, {}, any>
4 |   export default component
5 | } 


--------------------------------------------------------------------------------
/codex-config/config.toml:
--------------------------------------------------------------------------------
1 | # IMPORTANT: the top-level key is `mcp_servers` rather than `mcpServers`.
2 | [mcp_servers.code-search]
3 | command = "npx"
4 | args = ["-y", "@probelabs/probe", "mcp", "server"]
5 | # env = { "API_KEY" = "value" }
6 | 
7 | 


--------------------------------------------------------------------------------
/site/blog.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Probe Blog
3 | description: Latest news, updates, and insights about Probe - the AI-friendly code search tool
4 | ---
5 | 
6 | # Redirecting to Blog...
7 | 
8 | Please visit the [Probe Blog](/blog/) for the latest news and updates.


--------------------------------------------------------------------------------
/npm/src/agent/acp/index.js:
--------------------------------------------------------------------------------
1 | // ACP (Agent Client Protocol) module exports
2 | export { ACPServer } from './server.js';
3 | export { ACPConnection } from './connection.js';
4 | export { ACPToolCall, ACPToolManager } from './tools.js';
5 | export * from './types.js';


--------------------------------------------------------------------------------
/.githooks/pre-commit-vow:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # Vow - AI accountability gate
 3 | # This hook runs at the end of the pre-commit pipeline
 4 | 
 5 | npx @probelabs/vow check
 6 | if [ $? -ne 0 ]; then
 7 |   echo "Vow check failed. Commit aborted."
 8 |   exit 1
 9 | fi
10 | 


--------------------------------------------------------------------------------
/Cross.toml:
--------------------------------------------------------------------------------
 1 | # Cross-compilation configuration for probe
 2 | # This configures the cross tool for ARM64 Linux builds
 3 | 
 4 | [build.env]
 5 | passthrough = [
 6 |     "CARGO_INCREMENTAL",
 7 |     "CARGO_TERM_COLOR",
 8 | ]
 9 | 
10 | # Cross uses prebuilt images for musl targets; no per-target overrides needed.
11 | 


--------------------------------------------------------------------------------
/examples/reranker/rust_bert_test/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rust-bert-reranker-test"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | [dependencies]
 7 | rust-bert = "0.21"
 8 | anyhow = "1.0"
 9 | tokio = { version = "1.0", features = ["full"] }
10 | 
11 | # For downloading model files
12 | tch = "0.13.0"


--------------------------------------------------------------------------------
/site/.env.example:
--------------------------------------------------------------------------------
 1 | # Environment variables for Cloudflare Pages deployment
 2 | # Copy this to .env and configure as needed
 3 | 
 4 | # Cloudflare configuration (optional)
 5 | # CLOUDFLARE_ACCOUNT_ID=your-account-id
 6 | # CLOUDFLARE_API_TOKEN=your-api-token
 7 | 
 8 | # Build configuration
 9 | NODE_VERSION=20
10 | NPM_VERSION=latest


--------------------------------------------------------------------------------
/.claude/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "hooks": {
 3 |     "Stop": [
 4 |       {
 5 |         "hooks": [
 6 |           {
 7 |             "type": "command",
 8 |             "command": "$CLAUDE_PROJECT_DIR/scripts/claude-hook-wrapper.sh $CLAUDE_PROJECT_DIR/.githooks/pre-commit"
 9 |           }
10 |         ]
11 |       }
12 |     ]
13 |   }
14 | }


--------------------------------------------------------------------------------
/examples/chat/bin/probe-chat.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | 
 3 | /**
 4 |  * @probelabs/probe-chat CLI
 5 |  * Command-line interface for Probe code search chat
 6 |  *
 7 |  * This is a thin wrapper around the main functionality in index.js
 8 |  */
 9 | 
10 | import { main } from '../index.js';
11 | 
12 | // Execute the main function
13 | main();


--------------------------------------------------------------------------------
/.githooks/post-commit:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | #
 3 | # Post-commit hook to provide feedback after a successful commit
 4 | # Installed by 'make install-hooks'
 5 | 
 6 | # Colors for better output
 7 | GREEN='\033[0;32m'
 8 | NC='\033[0m' # No Color
 9 | 
10 | echo "${GREEN}Commit successful! All checks passed.${NC}"
11 | echo "${GREEN}Remember to push your changes.${NC}"
12 | 


--------------------------------------------------------------------------------
/tests/mocks/test_ip_whitelist.go:
--------------------------------------------------------------------------------
 1 | package middleware
 2 | 
 3 | // IPWhiteListMiddleware is a middleware that checks if the client's IP is in the whitelist
 4 | type IPWhiteListMiddleware struct {
 5 | 	Whitelist []string
 6 | }
 7 | 
 8 | // Name returns the name of the middleware
 9 | func (i *IPWhiteListMiddleware) Name() string {
10 | 	return "IPWhiteListMiddleware"
11 | }


--------------------------------------------------------------------------------
/examples/reranker/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Requirements for cross-encoder testing and debugging
 2 | torch>=1.9.0
 3 | transformers>=4.20.0
 4 | sentence-transformers>=2.2.0
 5 | numpy>=1.21.0
 6 | 
 7 | # Optional but recommended for better performance
 8 | tokenizers>=0.13.0
 9 | 
10 | # For additional debugging and analysis
11 | matplotlib>=3.5.0
12 | seaborn>=0.11.0
13 | pandas>=1.3.0


--------------------------------------------------------------------------------
/site/public/_redirects:
--------------------------------------------------------------------------------
 1 | # Redirect rules for Cloudflare Pages
 2 | 
 3 | # Redirect www to main domain
 4 | https://www.probelabs.com/* https://probelabs.com/:splat 301
 5 | 
 6 | # Handle clean URLs - VitePress already generates clean URLs
 7 | # but this ensures fallback behavior
 8 | /docs/* /404.html 404
 9 | 
10 | # Fallback for any 404s to custom 404 page
11 | /* /404.html 404


--------------------------------------------------------------------------------
/site/wrangler.toml:
--------------------------------------------------------------------------------
1 | # Cloudflare Pages configuration for Probe documentation site
2 | name = "probe-docs"
3 | compatibility_date = "2024-01-15"
4 | 
5 | # Pages project configuration
6 | pages_build_output_dir = ".vitepress/dist"
7 | 
8 | # Note: Custom domains (probelabs.com) are configured via Cloudflare Dashboard
9 | # Routes configuration is not supported in wrangler.toml for Pages


--------------------------------------------------------------------------------
/site/public/icons/privacy-icon.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" width="32" height="32" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
2 |   <rect x="3" y="11" width="18" height="11" rx="2" ry="2"></rect>
3 |   <path d="M7 11V7a5 5 0 0 1 10 0v4"></path>
4 |   <circle cx="12" cy="16" r="1"></circle>
5 | </svg> 


--------------------------------------------------------------------------------
/site/contributing/README.md:
--------------------------------------------------------------------------------
1 | # Contributing to Probe
2 | 
3 | This directory contains guides for contributors to the Probe project, including:
4 | 
5 | - Documentation Maintenance
6 | - Documentation Cross-References
7 | - Documentation Structure
8 | 
9 | For general contribution guidelines, please see the [Contributing Guide](https://github.com/probelabs/probe/blob/main/CONTRIBUTING.md) in the main repository.


--------------------------------------------------------------------------------
/.cargo/config.toml:
--------------------------------------------------------------------------------
 1 | # Target-specific overrides for Windows MSVC
 2 | [target.x86_64-pc-windows-msvc]
 3 | rustflags = ["-C", "target-feature=+crt-static", "-C", "link-args=/DEBUG:NONE /NOLOGO"]
 4 | 
 5 | [target.i686-pc-windows-msvc]
 6 | rustflags = ["-C", "target-feature=+crt-static", "-C", "link-args=/DEBUG:NONE /NOLOGO"]
 7 | 
 8 | # Configuration for Windows builds
 9 | [target.'cfg(target_os = "windows")']
10 | # General Windows configuration


--------------------------------------------------------------------------------
/src/language/common.rs:
--------------------------------------------------------------------------------
 1 | use std::collections::HashSet;
 2 | use tree_sitter::Node;
 3 | 
 4 | /// Helper function to collect all node types in the AST
 5 | pub fn collect_node_types(node: Node, node_types: &mut HashSet<String>) {
 6 |     node_types.insert(node.kind().to_string());
 7 | 
 8 |     let mut cursor = node.walk();
 9 |     for child in node.children(&mut cursor) {
10 |         collect_node_types(child, node_types);
11 |     }
12 | }
13 | 


--------------------------------------------------------------------------------
/site/use-cases/README.md:
--------------------------------------------------------------------------------
1 | # Use Cases & Workflows
2 | 
3 | This directory contains scenario-based documentation for different Probe usage patterns.
4 | 
5 | - **Web Interface**: Code as a source of truth for product functionality
6 | - **AI Code Editors & MCP**: Integration with AI-powered code editors
7 | - **CLI for AI Workflows**: Using Probe in the command line for AI-assisted workflows
8 | - **Developers & SDK**: Building custom tools with the Node.js SDK


--------------------------------------------------------------------------------
/npm/bin/README.md:
--------------------------------------------------------------------------------
 1 | # Probe Binary Directory
 2 | 
 3 | This directory is used to store the downloaded probe binary.
 4 | 
 5 | The binary is automatically downloaded during package installation.
 6 | If you encounter any issues with the download, you can manually place the probe binary in this directory.
 7 | 
 8 | Binary name should be:
 9 | - `probe` (on Linux/macOS)
10 | - `probe.exe` (on Windows)
11 | 
12 | You can download the binary from: https://github.com/probelabs/probe/releases
13 | 


--------------------------------------------------------------------------------
/test_data/test_nested_struct.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"net/http"
 5 | 
 6 | 	"github.com/gin-gonic/gin"
 7 | )
 8 | 
 9 | func HandleNotFound(c *gin.Context) {
10 | 	c.JSON(http.StatusNotFound, ErrorResponse{
11 | 		Errors: []struct {
12 | 			Title  string `json:"title"`
13 | 			Detail string `json:"detail"`
14 | 		}{{Title: "Not Found", Detail: "Model price not found"}},
15 | 	})
16 | }
17 | 
18 | type ErrorResponse struct {
19 | 	Errors interface{} `json:"errors"`
20 | }
21 | 
22 | func main() {
23 | 	r := gin.Default()
24 | 	r.NoRoute(HandleNotFound)
25 | 	r.Run()
26 | }
27 | 


--------------------------------------------------------------------------------
/src/search/token_utils.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::OnceLock;
 2 | use tiktoken_rs::p50k_base;
 3 | use tiktoken_rs::CoreBPE;
 4 | 
 5 | /// Returns a reference to the tiktoken tokenizer
 6 | pub fn get_tokenizer() -> &'static CoreBPE {
 7 |     static TOKENIZER: OnceLock<CoreBPE> = OnceLock::new();
 8 |     TOKENIZER.get_or_init(|| p50k_base().unwrap())
 9 | }
10 | 
11 | /// Helper function to count tokens in a string using tiktoken (same tokenizer as GPT models)
12 | pub fn count_tokens(text: &str) -> usize {
13 |     let tokenizer = get_tokenizer();
14 |     tokenizer.encode_with_special_tokens(text).len()
15 | }
16 | 


--------------------------------------------------------------------------------
/examples/chat/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Node.js
 2 | node_modules/
 3 | npm-debug.log*
 4 | yarn-debug.log*
 5 | yarn-error.log*
 6 | .npm
 7 | .yarn
 8 | 
 9 | # Logs
10 | *.log
11 | logs/
12 | *.tmp
13 | 
14 | # Test files (we'll copy them explicitly if needed)
15 | test/
16 | *.test.js
17 | test-*.js
18 | 
19 | # Development files
20 | .gitignore
21 | .git/
22 | .github/
23 | .vscode/
24 | .idea/
25 | 
26 | # Temporary files
27 | probe-debug.txt
28 | simple-traces.jsonl
29 | *.cache
30 | 
31 | # Documentation (keep only essential ones)
32 | TRACING.md
33 | README.md
34 | 
35 | # OS generated files
36 | .DS_Store
37 | Thumbs.db


--------------------------------------------------------------------------------
/npm/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2020",
 4 |     "module": "ESNext",
 5 |     "moduleResolution": "node",
 6 |     "lib": ["ES2020"],
 7 |     "outDir": "./build",
 8 |     "rootDir": "./src",
 9 |     "strict": true,
10 |     "esModuleInterop": true,
11 |     "allowSyntheticDefaultImports": true,
12 |     "skipLibCheck": true,
13 |     "forceConsistentCasingInFileNames": true,
14 |     "resolveJsonModule": true,
15 |     "allowJs": true,
16 |     "declaration": true,
17 |     "declarationMap": true
18 |   },
19 |   "include": ["src/**/*"],
20 |   "exclude": ["node_modules", "build", "bin"]
21 | }


--------------------------------------------------------------------------------
/site/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "probe-docs",
 3 |   "version": "1.0.0",
 4 |   "description": "Documentation for Probe - AI-friendly code search tool",
 5 |   "scripts": {
 6 |     "docs:dev": "vitepress dev",
 7 |     "docs:build": "vitepress build",
 8 |     "docs:preview": "vitepress preview",
 9 |     "dev": "vitepress dev",
10 |     "build": "vitepress build",
11 |     "preview": "vitepress preview"
12 |   },
13 |   "dependencies": {
14 |     "highlight.js": "^11.11.1",
15 |     "vitepress": "^1.6.3"
16 |   },
17 |   "devDependencies": {
18 |     "@vuepress/plugin-shiki": "^2.0.0-rc.83",
19 |     "markdown-it": "^14.1.0"
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/tests/property_tests.proptest-regressions:
--------------------------------------------------------------------------------
 1 | # Seeds for failure cases proptest has generated in the past. It is
 2 | # automatically read and these particular cases re-run before any
 3 | # novel cases are generated.
 4 | #
 5 | # It is recommended to check this file in to source control so that
 6 | # everyone who runs the test benefits from these saved cases.
 7 | cc 4389d9e5a829d147cb11cb4b5182e75a0741f36823f0ade724d5f19102e93dcf # shrinks to query = "'a?"
 8 | cc 8553eb31be6672c0c6fd031bee95f520d4bb55fec15e8474d10a3c5b5ba8b4c4 # shrinks to query = "["
 9 | cc 6a5d367e62d691869428703bc771f64a21557cab6f6ae6837e8d2f179d1efe0e # shrinks to docs = [""], query = ""
10 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/site/public/_headers:
--------------------------------------------------------------------------------
 1 | # Global headers for all files
 2 | /*
 3 |   X-Frame-Options: DENY
 4 |   X-Content-Type-Options: nosniff
 5 |   Referrer-Policy: strict-origin-when-cross-origin
 6 |   X-XSS-Protection: 1; mode=block
 7 | 
 8 | # Cache static assets for 1 year
 9 | /assets/*
10 |   Cache-Control: public, max-age=31536000, immutable
11 | 
12 | # Cache images for 1 week
13 | /*.png
14 |   Cache-Control: public, max-age=604800
15 | 
16 | /*.jpg
17 |   Cache-Control: public, max-age=604800
18 | 
19 | /*.svg
20 |   Cache-Control: public, max-age=604800
21 | 
22 | # Cache main page for 1 hour
23 | /
24 |   Cache-Control: public, max-age=3600
25 | 
26 | # Cache other HTML files for 1 hour
27 | /*.html
28 |   Cache-Control: public, max-age=3600


--------------------------------------------------------------------------------
/examples/reranker/src/bin/benchmark.rs:
--------------------------------------------------------------------------------
 1 | use anyhow::Result;
 2 | use clap::Parser;
 3 | use bert_reranker::benchmark::{BenchmarkArgs, run_benchmark, print_document_stats, collect_source_files};
 4 | 
 5 | #[tokio::main]
 6 | async fn main() -> Result<()> {
 7 |     let args = BenchmarkArgs::parse();
 8 | 
 9 |     println!("🚀 BERT Reranker Performance Benchmark");
10 |     println!("======================================");
11 | 
12 |     // Collect documents first to show stats
13 |     let documents = collect_source_files(&args)?;
14 |     print_document_stats(&documents);
15 | 
16 |     // Run the benchmark
17 |     let result = run_benchmark(args).await?;
18 | 
19 |     // Print results
20 |     result.print_summary();
21 | 
22 |     Ok(())
23 | }


--------------------------------------------------------------------------------
/result1.txt:
--------------------------------------------------------------------------------
 1 | Pattern: yaml workflow agent multi-agent user input
 2 | Path: /Users/leonidbugaev/go/src/semantic-kernel/dotnet/src/VectorData/VectorData.Abstractions/Properties
 3 | Options: Reranker: bm25, No block merging
 4 | Using BM25 ranking (Okapi BM25 algorithm)
 5 | Search completed in 34.73ms
 6 | 
 7 | File: /Users/leonidbugaev/go/src/semantic-kernel/dotnet/src/VectorData/VectorData.Abstractions/Properties/AssemblyInfo.cs
 8 | Lines: 1-4
 9 | ```cs
10 | 
11 | ```
12 | File: /Users/leonidbugaev/go/src/semantic-kernel/dotnet/src/VectorData/VectorData.Abstractions/Properties/AssemblyInfo.cs
13 | Lines: 1-1
14 | ```cs
15 | ﻿// Copyright (c) Microsoft. All rights reserved.
16 | ```
17 | Found 2 search results
18 | Total bytes returned: 51
19 | Total tokens returned: 14
20 | 


--------------------------------------------------------------------------------
/examples/reranker/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "bert-reranker"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | [[bin]]
 7 | name = "reranker"
 8 | path = "src/main.rs"
 9 | 
10 | [[bin]]
11 | name = "demo"
12 | path = "src/demo.rs"
13 | 
14 | [[bin]]
15 | name = "benchmark"
16 | path = "src/bin/benchmark.rs"
17 | 
18 | [dependencies]
19 | candle-core = "0.8"
20 | candle-nn = "0.8"
21 | candle-transformers = "0.8"
22 | tokenizers = "0.20"
23 | anyhow = "1.0"
24 | clap = { version = "4.0", features = ["derive"] }
25 | serde = { version = "1.0", features = ["derive"] }
26 | serde_json = "1.0"
27 | hf-hub = { version = "0.3", features = ["tokio"] }
28 | safetensors = "0.4"
29 | tokio = { version = "1.0", features = ["full"] }
30 | walkdir = "2.4"
31 | rayon = "1.8"
32 | parking_lot = "0.12"
33 | tempfile = "3.8"


--------------------------------------------------------------------------------
/examples/chat/npm/LICENSE:
--------------------------------------------------------------------------------
 1 | ISC License
 2 | 
 3 | Copyright (c) 2024 Leonid Bugaev
 4 | 
 5 | Permission to use, copy, modify, and/or distribute this software for any
 6 | purpose with or without fee is hereby granted, provided that the above
 7 | copyright notice and this permission notice appear in all copies.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 


--------------------------------------------------------------------------------
/src/search/search_options.rs:
--------------------------------------------------------------------------------
 1 | use std::path::Path;
 2 | 
 3 | /// Options for performing a search
 4 | pub struct SearchOptions<'a> {
 5 |     pub path: &'a Path,
 6 |     pub queries: &'a [String],
 7 |     pub files_only: bool,
 8 |     pub custom_ignores: &'a [String],
 9 |     pub exclude_filenames: bool,
10 |     pub reranker: &'a str,
11 |     #[allow(dead_code)]
12 |     pub frequency_search: bool,
13 |     pub exact: bool,
14 |     pub language: Option<&'a str>,
15 |     pub max_results: Option<usize>,
16 |     pub max_bytes: Option<usize>,
17 |     pub max_tokens: Option<usize>,
18 |     pub allow_tests: bool,
19 |     pub no_merge: bool,
20 |     pub merge_threshold: Option<usize>,
21 |     pub dry_run: bool,
22 |     pub session: Option<&'a str>,
23 |     pub timeout: u64,
24 |     pub question: Option<&'a str>,
25 |     pub no_gitignore: bool,
26 | }
27 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Git files
 2 | .git/
 3 | .github/
 4 | .gitignore
 5 | .githooks/
 6 | 
 7 | # Build artifacts
 8 | target/
 9 | Cargo.lock
10 | 
11 | # Documentation
12 | *.md
13 | !README.md
14 | !LICENSE
15 | !ABOUT.MD
16 | docs/
17 | site/
18 | 
19 | # IDE and editor files
20 | .vscode/
21 | .idea/
22 | *.swp
23 | *.swo
24 | *~
25 | .DS_Store
26 | 
27 | # Test files
28 | tests/
29 | test_cases/
30 | *.test.*
31 | 
32 | # Scripts and CI
33 | scripts/
34 | .claude/
35 | 
36 | # Node.js artifacts (for examples/chat)
37 | node_modules/
38 | npm-debug.log*
39 | yarn-debug.log*
40 | yarn-error.log*
41 | .npm/
42 | .yarn/
43 | 
44 | # Examples (but keep benchmarks - needed for Cargo.toml validation)
45 | examples/
46 | !examples/chat/
47 | 
48 | # MCP directories
49 | mcp-agent/
50 | 
51 | # NPM package directory
52 | npm/
53 | 
54 | # Temporary files
55 | *.tmp
56 | *.log
57 | *.cache


--------------------------------------------------------------------------------
/tests/common.rs:
--------------------------------------------------------------------------------
 1 | use anyhow::Result;
 2 | use std::process::Command;
 3 | 
 4 | pub struct TestContext;
 5 | 
 6 | impl Default for TestContext {
 7 |     fn default() -> Self {
 8 |         Self::new()
 9 |     }
10 | }
11 | 
12 | impl TestContext {
13 |     pub fn new() -> Self {
14 |         TestContext
15 |     }
16 | 
17 |     pub fn run_probe(&self, args: &[&str]) -> Result<String> {
18 |         let output = Command::new("cargo")
19 |             .args(["run", "--"])
20 |             .args(args)
21 |             .output()?;
22 | 
23 |         if !output.status.success() {
24 |             anyhow::bail!(
25 |                 "Command failed with status {}: {}",
26 |                 output.status,
27 |                 String::from_utf8_lossy(&output.stderr)
28 |             );
29 |         }
30 | 
31 |         Ok(String::from_utf8_lossy(&output.stdout).to_string())
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/tests/mocks/test_object.js:
--------------------------------------------------------------------------------
 1 | // Example JavaScript object with properties
 2 | 
 3 | const user = {
 4 | 	id: 1,
 5 | 	name: "John Smith",
 6 | 	email: "john.smith@example.com",
 7 | 	profile: {
 8 | 		age: 30,
 9 | 		occupation: "Software Engineer",
10 | 		skills: ["JavaScript", "TypeScript", "React", "Node.js"]
11 | 	},
12 | 	isActive: true,
13 | 	lastLogin: new Date("2023-01-01")
14 | };
15 | 
16 | // Function to display user information
17 | function displayUserInfo(user) {
18 | 	console.log(`User: ${user.name} (ID: ${user.id})`);
19 | 	console.log(`Email: ${user.email}`);
20 | 	console.log(`Occupation: ${user.profile.occupation}`);
21 | 	console.log(`Skills: ${user.profile.skills.join(", ")}`);
22 | 	console.log(`Active: ${user.isActive ? "Yes" : "No"}`);
23 | 	console.log(`Last Login: ${user.lastLogin.toLocaleDateString()}`);
24 | }
25 | 
26 | // Call the function
27 | displayUserInfo(user);
28 | 


--------------------------------------------------------------------------------
/site/.vitepress/components/TestComponent.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <div class="test-component">
 3 |     <div class="test-content">
 4 |       <h3>Content Slot:</h3>
 5 |       <div class="content-box">
 6 |         <slot name="content"></slot>
 7 |       </div>
 8 |     </div>
 9 |     <div class="test-code">
10 |       <h3>Code Slot:</h3>
11 |       <div class="code-box">
12 |         <slot name="code"></slot>
13 |       </div>
14 |     </div>
15 |   </div>
16 | </template>
17 | 
18 | <style scoped>
19 | .test-component {
20 |   border: 2px solid #ccc;
21 |   padding: 20px;
22 |   margin: 20px 0;
23 |   border-radius: 8px;
24 | }
25 | 
26 | .test-content, .test-code {
27 |   margin-bottom: 20px;
28 | }
29 | 
30 | .content-box, .code-box {
31 |   border: 1px dashed #999;
32 |   padding: 15px;
33 |   border-radius: 4px;
34 |   background-color: #f5f5f5;
35 | }
36 | 
37 | h3 {
38 |   margin-top: 0;
39 |   margin-bottom: 10px;
40 | }
41 | </style> 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Version [e.g. 22]
30 | 
31 | **Smartphone (please complete the following information):**
32 |  - Device: [e.g. iPhone6]
33 |  - OS: [e.g. iOS8.1]
34 |  - Browser [e.g. stock browser, safari]
35 |  - Version [e.g. 22]
36 | 
37 | **Additional context**
38 | Add any other context about the problem here.
39 | 


--------------------------------------------------------------------------------
/tests/test_file.rs:
--------------------------------------------------------------------------------
 1 | // This is a test file for probe
 2 | 
 3 | #[allow(dead_code)]
 4 | fn function1() {
 5 |     println!("This is function 1");
 6 |     // Some code here
 7 |     let x = 10;
 8 |     let y = 20;
 9 |     println!("Sum: {}", x + y);
10 | }
11 | 
12 | // This comment is between function1 and function2
13 | // Only a few lines of separation
14 | 
15 | #[allow(dead_code)]
16 | fn function2() {
17 |     println!("This is function 2");
18 |     // Some code here
19 |     let a = 30;
20 |     let b = 40;
21 |     println!("Product: {}", a * b);
22 | }
23 | 
24 | // This is a larger gap between functions
25 | 
26 | // More comments
27 | // More comments
28 | // More comments
29 | // More comments
30 | // More comments
31 | // More comments
32 | // More comments
33 | // More comments
34 | // More comments
35 | // More comments
36 | // More comments
37 | 
38 | #[allow(dead_code)]
39 | fn function3() {
40 |     println!("This is function 3");
41 |     // Some code here
42 |     let c = 50;
43 |     let d = 60;
44 |     println!("Difference: {}", d - c);
45 | }
46 | 


--------------------------------------------------------------------------------
/examples/chat/npm/.npmignore:
--------------------------------------------------------------------------------
 1 | # Development files
 2 | .git
 3 | .github
 4 | .gitignore
 5 | .vscode
 6 | .idea
 7 | .DS_Store
 8 | 
 9 | # Test files
10 | test
11 | tests
12 | __tests__
13 | *.test.js
14 | *.spec.js
15 | 
16 | # Documentation
17 | docs
18 | doc
19 | *.md
20 | !README.md
21 | 
22 | # Build files
23 | .travis.yml
24 | .gitlab-ci.yml
25 | .github
26 | .circleci
27 | 
28 | # Logs
29 | logs
30 | *.log
31 | npm-debug.log*
32 | yarn-debug.log*
33 | yarn-error.log*
34 | 
35 | # Runtime data
36 | pids
37 | *.pid
38 | *.seed
39 | *.pid.lock
40 | 
41 | # Coverage directory used by tools like istanbul
42 | coverage
43 | .nyc_output
44 | 
45 | # Dependency directories
46 | node_modules
47 | 
48 | # Optional npm cache directory
49 | .npm
50 | 
51 | # Optional eslint cache
52 | .eslintcache
53 | 
54 | # Optional REPL history
55 | .node_repl_history
56 | 
57 | # Output of 'npm pack'
58 | *.tgz
59 | 
60 | # dotenv environment variable files
61 | .env
62 | .env.local
63 | .env.development.local
64 | .env.test.local
65 | .env.production.local
66 | 
67 | # Temporary files
68 | tmp
69 | temp 


--------------------------------------------------------------------------------
/npm/tests/unit/system-prompt.test.js:
--------------------------------------------------------------------------------
 1 | import { describe, test, expect } from '@jest/globals';
 2 | import { ProbeAgent } from '../../src/agent/ProbeAgent.js';
 3 | 
 4 | describe('ProbeAgent systemPrompt alias', () => {
 5 |   test('uses systemPrompt when provided', () => {
 6 |     const agent = new ProbeAgent({
 7 |       path: process.cwd(),
 8 |       systemPrompt: 'system-level prompt'
 9 |     });
10 | 
11 |     expect(agent.customPrompt).toBe('system-level prompt');
12 |   });
13 | 
14 |   test('systemPrompt takes precedence over customPrompt', () => {
15 |     const agent = new ProbeAgent({
16 |       path: process.cwd(),
17 |       systemPrompt: 'primary system prompt',
18 |       customPrompt: 'secondary custom prompt'
19 |     });
20 | 
21 |     expect(agent.customPrompt).toBe('primary system prompt');
22 |   });
23 | 
24 |   test('falls back to customPrompt when systemPrompt is absent', () => {
25 |     const agent = new ProbeAgent({
26 |       path: process.cwd(),
27 |       customPrompt: 'custom prompt only'
28 |     });
29 | 
30 |     expect(agent.customPrompt).toBe('custom prompt only');
31 |   });
32 | });
33 | 


--------------------------------------------------------------------------------
/result2.txt:
--------------------------------------------------------------------------------
 1 | Pattern: yaml workflow agent multi-agent user input
 2 | Path: /Users/leonidbugaev/go/src/semantic-kernel/dotnet/src/VectorData/VectorData.Abstractions/Properties
 3 | Options: Reranker: bm25, No block merging
 4 | Using BM25 ranking (Okapi BM25 algorithm)
 5 | Search completed in 30.19ms
 6 | 
 7 | File: /Users/leonidbugaev/go/src/semantic-kernel/dotnet/src/VectorData/VectorData.Abstractions/Properties/AssemblyInfo.cs
 8 | Lines: 1-1
 9 | ```cs
10 | ﻿// Copyright (c) Microsoft. All rights reserved.
11 | ```
12 | File: /Users/leonidbugaev/go/src/semantic-kernel/dotnet/src/VectorData/VectorData.Abstractions/Properties/AssemblyInfo.cs
13 | Lines: 3-3
14 | ```cs
15 | [assembly: System.Resources.NeutralResourcesLanguage("en-US")]
16 | ```
17 | File: /Users/leonidbugaev/go/src/semantic-kernel/dotnet/src/VectorData/VectorData.Abstractions/Properties/AssemblyInfo.cs
18 | Lines: 1-3
19 | ```cs
20 | ﻿// Copyright (c) Microsoft. All rights reserved.
21 | 
22 | [assembly: System.Resources.NeutralResourcesLanguage("en-US")]
23 | ```
24 | Found 3 search results
25 | Total bytes returned: 228
26 | Total tokens returned: 64
27 | 


--------------------------------------------------------------------------------
/examples/chat/test-simple-tracing.js:
--------------------------------------------------------------------------------
 1 | import { ProbeChat } from './probeChat.js';
 2 | 
 3 | // Simple test to check if tracing works
 4 | async function testSimpleTracing() {
 5 |   console.log('Testing simple tracing...\n');
 6 |   
 7 |   try {
 8 |     // Create a ProbeChat instance with debug enabled
 9 |     const probeChat = new ProbeChat({
10 |       debug: true
11 |     });
12 |     
13 |     // Test just the extractImageUrls function directly
14 |     const message = 'Here is an image: https://github.com/user-attachments/assets/example.png';
15 |     
16 |     console.log('🔍 Testing extractImageUrls function...');
17 |     
18 |     // Import the function to test it directly
19 |     const { extractImageUrls } = await import('./probeChat.js');
20 |     
21 |     // This should create a span
22 |     const result = await extractImageUrls(message, true);
23 |     
24 |     console.log('✅ extractImageUrls result:', result);
25 |     console.log('🎉 Test completed!');
26 |     
27 |   } catch (error) {
28 |     console.error('❌ Test failed:', error.message);
29 |   }
30 | }
31 | 
32 | testSimpleTracing().catch(console.error);


--------------------------------------------------------------------------------
/npm/scripts/build-mcp.cjs:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | 
 3 | const fs = require('fs-extra');
 4 | const path = require('path');
 5 | const { execSync } = require('child_process');
 6 | 
 7 | async function buildMcp() {
 8 |   try {
 9 |     console.log('Building MCP TypeScript...');
10 |     
11 |     // Ensure build directory exists
12 |     await fs.ensureDir('build');
13 |     
14 |     // Copy src files to build directory
15 |     console.log('Copying source files...');
16 |     await fs.copy('src', 'build', { 
17 |       overwrite: true,
18 |       errorOnExist: false 
19 |     });
20 |     
21 |     // Run TypeScript compiler
22 |     console.log('Compiling TypeScript...');
23 |     execSync('tsc src/mcp/index.ts --outDir build/mcp --module esnext --target es2020 --moduleResolution node --esModuleInterop --allowSyntheticDefaultImports --skipLibCheck', {
24 |       stdio: 'inherit',
25 |       cwd: process.cwd()
26 |     });
27 |     
28 |     console.log('✅ MCP build completed successfully');
29 |     
30 |   } catch (error) {
31 |     console.error('❌ MCP build failed:', error.message);
32 |     process.exit(1);
33 |   }
34 | }
35 | 
36 | buildMcp();


--------------------------------------------------------------------------------
/.github/workflows/visor.yml:
--------------------------------------------------------------------------------
 1 | name: Visor Code Review
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     types: [opened, synchronize]
 6 |   issues:
 7 |     types: [opened]
 8 |   issue_comment:
 9 |     types: [created]
10 | 
11 | permissions:
12 |   contents: read
13 |   pull-requests: write
14 |   issues: write
15 |   checks: write
16 | 
17 | jobs:
18 |   code-review:
19 |     runs-on: ubuntu-latest
20 |     steps:
21 |       - name: Checkout code
22 |         uses: actions/checkout@v4
23 |       - uses: probelabs/visor@main
24 |         with:
25 |           app-id: ${{ secrets.APP_ID }}
26 |           private-key: ${{ secrets.APP_PRIVATE_KEY }}
27 |           installation-id: ${{ secrets.APP_INSTALLATION_ID }}
28 |           debug: 'true'
29 |         env:
30 |           # AI Provider API Keys (configure one of these in your repository secrets)
31 |           # GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
32 |           ANTHROPIC_API_KEY: ${{ secrets.GLM_API_KEY }}
33 |           ANTHROPIC_API_URL: 'https://api.z.ai/api/anthropic/v1'
34 |           # OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
35 |           # Optional: Specify the AI model to use
36 |           MODEL_NAME: 'glm-4.6'
37 | 


--------------------------------------------------------------------------------
/npm/jest.config.js:
--------------------------------------------------------------------------------
 1 | /** @type {import('jest').Config} */
 2 | export default {
 3 |   transform: {},
 4 |   
 5 |   // Test environment
 6 |   testEnvironment: 'node',
 7 |   
 8 |   // Test file patterns - run stable test files  
 9 |   testMatch: [
10 |     '**/tests/**/*.test.js',
11 |     '**/src/agent/acp/tools.test.js',
12 |     '**/src/agent/acp/connection.test.js',
13 |     '**/src/agent/acp/types.test.js'
14 |   ],
15 |   
16 |   // Coverage configuration
17 |   collectCoverageFrom: [
18 |     'src/**/*.js',
19 |     '!src/**/*.test.js',
20 |     '!src/test-*.js',
21 |     '!**/node_modules/**',
22 |     '!**/build/**',
23 |     '!**/dist/**'
24 |   ],
25 |   
26 |   // Coverage thresholds
27 |   coverageThreshold: {
28 |     global: {
29 |       branches: 70,
30 |       functions: 70,
31 |       lines: 70,
32 |       statements: 70
33 |     }
34 |   },
35 |   
36 |   // Coverage reporters
37 |   coverageReporters: [
38 |     'text',
39 |     'lcov',
40 |     'html'
41 |   ],
42 |   
43 |   // Setup files
44 |   setupFilesAfterEnv: ['<rootDir>/tests/setup.js'],
45 |   
46 |   
47 |   // Verbose output
48 |   verbose: true,
49 |   
50 |   // Timeout for tests
51 |   testTimeout: 10000
52 | };


--------------------------------------------------------------------------------
/npm/tests/unit/backtickAutoFix.test.js:
--------------------------------------------------------------------------------
 1 | import { validateAndFixMermaidResponse } from '../../src/agent/schemaUtils.js';
 2 | 
 3 | describe('Mermaid Auto-Fix - Backticks', () => {
 4 |   const mockOptions = {
 5 |     debug: false,
 6 |     path: '/test/path',
 7 |     provider: 'anthropic',
 8 |     model: 'claude-3-sonnet-20240229'
 9 |   };
10 | 
11 |   describe('Auto-fix backticks in node labels', () => {
12 | 
13 | 
14 | 
15 | 
16 | 
17 |     test('should remove backticks from quoted labels', async () => {
18 |       const response = `\`\`\`mermaid
19 | flowchart TD
20 |     A["Already quoted \`backticks\`"] --> B{"Also quoted \`here\`"}
21 | \`\`\``;
22 | 
23 |       const result = await validateAndFixMermaidResponse(response, mockOptions);
24 | 
25 |       // @probelabs/maid v0.0.15+ treats backticks inside quoted labels as errors (FL-LABEL-BACKTICK)
26 |       // and removes them during auto-fix. This is the expected behavior.
27 |       expect(result.fixedResponse).toContain('A["Already quoted backticks"]');
28 |       expect(result.fixedResponse).toContain('B{"Also quoted here"}');
29 |     });
30 | 
31 |   });
32 | 
33 |   describe('Validation detects backticks correctly', () => {
34 |   });
35 | });
36 | 


--------------------------------------------------------------------------------
/site/.vitepress/theme/components/BlogPostLayout.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <div class="blog-post">
 3 |     <div class="blog-post-meta">
 4 |       <div v-if="frontmatter.date" class="blog-post-date">
 5 |         {{ formatDate(frontmatter.date) }}
 6 |       </div>
 7 |       <div v-if="frontmatter.author" class="blog-post-author">
 8 |         By {{ frontmatter.author }}
 9 |       </div>
10 |       <div v-if="frontmatter.tags && frontmatter.tags.length" class="blog-post-tags">
11 |         <span v-for="tag in frontmatter.tags" :key="tag" class="blog-post-tag">
12 |           {{ tag }}
13 |         </span>
14 |       </div>
15 |     </div>
16 |     
17 |     <div class="blog-post-content">
18 |       <slot></slot>
19 |     </div>
20 |   </div>
21 | </template>
22 | 
23 | <script setup>
24 | import { useData } from 'vitepress'
25 | 
26 | const { frontmatter } = useData()
27 | 
28 | function formatDate(dateString) {
29 |   const date = new Date(dateString)
30 |   return date.toLocaleDateString('en-US', {
31 |     year: 'numeric',
32 |     month: 'long',
33 |     day: 'numeric'
34 |   })
35 | }
36 | </script>
37 | 
38 | <style scoped>
39 | /* All styles are now in the BlogLayout.vue file with proper scoping */
40 | /* This component just provides the structure */
41 | </style>


--------------------------------------------------------------------------------
/site/.vitepress/theme/components/FeatureSection.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <div class="feature-section">
 3 |     <div class="feature-content">
 4 |       <slot name="content"></slot>
 5 |     </div>
 6 |     <div class="feature-code">
 7 |       <slot name="code"></slot>
 8 |     </div>
 9 |   </div>
10 | </template>
11 | 
12 | <style scoped>
13 | .feature-section {
14 |   display: grid;
15 |   grid-template-columns: 1fr 1fr;
16 |   gap: 2rem;
17 |   padding: 2rem 0;
18 |   align-items: start;
19 | }
20 | 
21 | .feature-content {
22 |   padding-right: 2rem;
23 | }
24 | 
25 | .feature-code {
26 |   background: var(--vp-code-block-bg);
27 |   border-radius: 8px;
28 |   padding: 1rem;
29 | }
30 | 
31 | @media (max-width: 768px) {
32 |   .feature-section {
33 |     grid-template-columns: 1fr;
34 |     gap: 1rem;
35 |     padding: 1.5rem 0;
36 |   }
37 | 
38 |   .feature-content {
39 |     padding-right: 0;
40 |   }
41 | }
42 | 
43 | @media (max-width: 640px) {
44 |   .feature-section {
45 |     padding: 1rem 0;
46 |   }
47 |   
48 |   .feature-code {
49 |     padding: 0.75rem;
50 |   }
51 | }
52 | 
53 | @media (max-width: 480px) {
54 |   .feature-section {
55 |     padding: 0.75rem 0;
56 |   }
57 |   
58 |   .feature-code {
59 |     padding: 0.5rem;
60 |   }
61 | }
62 | </style> 


--------------------------------------------------------------------------------
/src/version.rs:
--------------------------------------------------------------------------------
 1 | //! Version utilities for probe
 2 | //!
 3 | //! This module provides utilities for getting version information at runtime.
 4 | 
 5 | /// Get the version string from Cargo.toml
 6 | pub fn get_version() -> &'static str {
 7 |     env!("CARGO_PKG_VERSION")
 8 | }
 9 | 
10 | /// Get the package name from Cargo.toml
11 | pub fn get_package_name() -> &'static str {
12 |     env!("CARGO_PKG_NAME")
13 | }
14 | 
15 | /// Get a formatted version string with package name
16 | pub fn get_version_info() -> String {
17 |     format!("{} {}", get_package_name(), get_version())
18 | }
19 | 
20 | #[cfg(test)]
21 | mod tests {
22 |     use super::*;
23 | 
24 |     #[test]
25 |     fn test_get_version() {
26 |         let version = get_version();
27 |         assert!(!version.is_empty());
28 |         // Should follow semantic versioning pattern
29 |         assert!(version.contains('.'));
30 |     }
31 | 
32 |     #[test]
33 |     fn test_get_package_name() {
34 |         let name = get_package_name();
35 |         assert_eq!(name, "probe-code");
36 |     }
37 | 
38 |     #[test]
39 |     fn test_get_version_info() {
40 |         let info = get_version_info();
41 |         assert!(info.contains("probe-code"));
42 |         assert!(info.contains('.'));
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/tests/test_tokenize.rs:
--------------------------------------------------------------------------------
 1 | fn main() {
 2 |     // Import the tokenize function from our probe crate
 3 |     use probe_code::ranking::tokenize;
 4 | 
 5 |     // Test strings
 6 |     let test_strings = ["The quick brown fox jumps over the lazy dog",
 7 |         "function calculateTotal(items) { return items.reduce((sum, item) => sum + item.price, 0); }",
 8 |         "class UserController extends BaseController implements UserInterface",
 9 |         "Searching for files containing important information",
10 |         "Fruitlessly searching for the missing variable in the codebase"];
11 | 
12 |     println!("Testing tokenization with stop word removal and stemming:\n");
13 | 
14 |     for (i, test_str) in test_strings.iter().enumerate() {
15 |         println!("Original text {}:\n{}", i + 1, test_str);
16 | 
17 |         // Tokenize with stop word removal and stemming
18 |         let tokens = tokenize(test_str);
19 | 
20 |         println!("Tokens after stop word removal and stemming:");
21 |         println!("{tokens:?}");
22 |         println!("Number of tokens: {}\n", tokens.len());
23 |     }
24 | 
25 |     // Specific test for stemming
26 |     println!("Specific stemming test:");
27 |     println!("'fruitlessly' stems to: {}", tokenize("fruitlessly")[0]);
28 | }
29 | 


--------------------------------------------------------------------------------
/tests/mocks/test_struct.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import "fmt"
 4 | 
 5 | // Person represents a person with various attributes
 6 | type Person struct {
 7 | 	Name        string
 8 | 	Age         int
 9 | 	Email       string
10 | 	PhoneNumber string
11 | 	Address     Address
12 | }
13 | 
14 | // Address represents a physical address
15 | type Address struct {
16 | 	Street  string
17 | 	City    string
18 | 	State   string
19 | 	ZipCode string
20 | 	Country string
21 | }
22 | 
23 | func main() {
24 | 	// Create a new person
25 | 	person := Person{
26 | 		Name:        "John Doe",
27 | 		Age:         30,
28 | 		Email:       "john.doe@example.com",
29 | 		PhoneNumber: "555-1234",
30 | 		Address: Address{
31 | 			Street:  "123 Main St",
32 | 			City:    "Anytown",
33 | 			State:   "CA",
34 | 			ZipCode: "12345",
35 | 			Country: "USA",
36 | 		},
37 | 	}
38 | 
39 | 	// Print the person's information
40 | 	fmt.Printf("Name: %s\n", person.Name)
41 | 	fmt.Printf("Age: %d\n", person.Age)
42 | 	fmt.Printf("Email: %s\n", person.Email)
43 | 	fmt.Printf("Phone: %s\n", person.PhoneNumber)
44 | 	fmt.Printf("Address: %s, %s, %s %s, %s\n",
45 | 		person.Address.Street,
46 | 		person.Address.City,
47 | 		person.Address.State,
48 | 		person.Address.ZipCode,
49 | 		person.Address.Country)
50 | }
51 | 


--------------------------------------------------------------------------------
/npm/tests/nestedQuoteFix.test.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Nested Quote Fix Tests
 3 |  *
 4 |  * NOTE: Most tests in this file have been skipped for maid 0.0.6 integration.
 5 |  * These tests check OLD regex-based HTML entity handling and quote fixing behavior:
 6 |  * - Converting &apos; to &#39;
 7 |  * - Automatic quote wrapping with escaped inner quotes
 8 |  * - Specific HTML entity normalization
 9 |  *
10 |  * Maid handles HTML entities and quotes differently using proper parsing.
11 |  * Tests marked with .skip check OLD behavior that maid doesn't replicate.
12 |  */
13 | 
14 | import { validateAndFixMermaidResponse } from '../src/agent/schemaUtils.js';
15 | 
16 | 
17 | 
18 | test('should not double-encode already encoded entities', async () => {
19 |   const preEncodedDiagram = `
20 |     \`\`\`mermaid
21 |     graph TD
22 |       A[Text with &#39;single&#39; quotes]
23 |       B[Text with &quot;double&quot; quotes]
24 |     \`\`\`
25 |   `;
26 | 
27 |   const result = await validateAndFixMermaidResponse(preEncodedDiagram, {
28 |     autoFix: true,
29 |     debug: false
30 |   });
31 | 
32 |   expect(result.isValid).toBe(true);
33 |   
34 |   // Should not double-encode
35 |   expect(result.fixedResponse).not.toContain('&amp;#39;');
36 |   expect(result.fixedResponse).not.toContain('&amp;quot;');
37 | });
38 | 
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/src/language/mod.rs:
--------------------------------------------------------------------------------
 1 | // Language module - provides functionality for parsing different programming languages
 2 | // using tree-sitter and extracting code blocks.
 3 | 
 4 | // Import submodules
 5 | pub mod block_handling;
 6 | pub mod common;
 7 | pub mod factory;
 8 | pub mod language_trait;
 9 | pub mod parser;
10 | pub mod parser_pool;
11 | pub mod test_detection;
12 | pub mod tree_cache;
13 | 
14 | // Language implementations
15 | pub mod c;
16 | pub mod cpp;
17 | pub mod csharp;
18 | pub mod go;
19 | pub mod html;
20 | pub mod java;
21 | pub mod javascript;
22 | pub mod markdown;
23 | pub mod php;
24 | pub mod python;
25 | pub mod ruby;
26 | pub mod rust;
27 | pub mod swift;
28 | pub mod typescript;
29 | pub mod yaml;
30 | 
31 | // Re-export items for backward compatibility
32 | pub use parser::{parse_file_for_code_blocks, parse_file_for_code_blocks_with_tree};
33 | pub use parser_pool::{clear_parser_pool, get_pool_stats, get_pooled_parser, return_pooled_parser};
34 | pub use test_detection::is_test_file;
35 | #[allow(unused_imports)]
36 | pub use tree_cache::{
37 |     clear_tree_cache, get_cache_size, get_or_parse_tree_pooled, invalidate_cache_entry,
38 | };
39 | 
40 | #[cfg(test)]
41 | mod tests;
42 | 
43 | #[cfg(test)]
44 | mod javascript_specific_tests;
45 | 
46 | #[cfg(test)]
47 | mod typescript_specific_tests;
48 | 


--------------------------------------------------------------------------------
/site/.vitepress/theme/components/CommandExample.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <div class="command-example" :class="{ 'with-output': !!output }">
 3 |     <div class="command-line">
 4 |       <span class="prompt">$</span>
 5 |       <slot></slot>
 6 |     </div>
 7 |     <div v-if="output" class="command-output">
 8 |       <pre><code>{{ output }}</code></pre>
 9 |     </div>
10 |   </div>
11 | </template>
12 | 
13 | <script setup>
14 | defineProps({
15 |   output: {
16 |     type: String,
17 |     default: ''
18 |   }
19 | })
20 | </script>
21 | 
22 | <style scoped>
23 | .command-example {
24 |   background: var(--bg-code);
25 |   border-radius: var(--radius-md);
26 |   overflow: hidden;
27 | }
28 | 
29 | .command-line {
30 |   padding: var(--space-md);
31 |   font-family: var(--font-mono);
32 |   font-size: 14px;
33 |   display: flex;
34 |   gap: var(--space-sm);
35 |   background: #2C2C2C;
36 |   color: #fff;
37 | }
38 | 
39 | .prompt {
40 |   color: #64DD17;
41 |   user-select: none;
42 | }
43 | 
44 | .command-output {
45 |   padding: var(--space-md);
46 |   font-family: var(--font-mono);
47 |   font-size: 14px;
48 |   color: var(--text-secondary);
49 |   background: var(--bg-code);
50 |   border-top: 1px solid rgba(0, 0, 0, 0.1);
51 | }
52 | 
53 | .command-output pre {
54 |   margin: 0;
55 |   white-space: pre-wrap;
56 | }
57 | </style> 


--------------------------------------------------------------------------------
/.claude/commands/performance-review.md:
--------------------------------------------------------------------------------
 1 | You goal is to measure and improve performance.
 2 | 
 3 | First run `cargo build --release` and remember the current performance: DEBUG=1 ./target/release/probe search "yaml workflow agent multi-agent user input" ~/go/src/semantic-kernel/ --max-tokens 10000 2>/dev/null | sed -n '/=== SEARCH TIMING INFORMATION ===/,/====================================/p'
 4 | 
 5 | Print it to the user.
 6 | 
 7 | Now that you have a baseline, find all the steps which take more then 1 second, and run the seaprate @architecture-agent for each, to plan if we can significantly improve performance. For each suggestion measure confidence. If confidence is high, add it to the detailed plan, if not, say that it is already performance enough.
 8 | 
 9 | Once you went though all the steps and build solid plan, I want you to start implementing it in a separate agent. 
10 | But always explicitly ask user before each next implementation.
11 | 
12 | Each change should be measured, and compared with our baseline. You can add more debugging to search timing information, or making it more detailed if needed.
13 | Once each change implemented, it should be commited as a separate commit.
14 | 
15 | We do care about backward compatibility, about determenistic outputs as well. Be careful. Validate each change by re-running all the tests..
16 | 


--------------------------------------------------------------------------------
/src/search/mod.rs:
--------------------------------------------------------------------------------
 1 | pub mod file_processing;
 2 | pub mod query;
 3 | mod result_ranking;
 4 | // Replace the old search_execution with new modules
 5 | pub mod block_merging;
 6 | pub mod cache; // New module for caching search results
 7 | pub mod early_ranker; // New module for early BM25 ranking
 8 | pub mod elastic_query;
 9 | pub mod file_list_cache; // New module for caching file lists
10 | pub mod filters; // New module for search filters (file:, ext:, type:, etc.)
11 | pub mod ripgrep_searcher;
12 | mod search_limiter;
13 | mod search_options;
14 | pub mod search_output;
15 | pub mod search_runner;
16 | pub mod search_tokens;
17 | pub mod simd_pattern_matching;
18 | pub mod simd_tokenization; // SIMD-accelerated tokenization
19 | pub mod term_exceptions; // New module for term exceptions
20 | pub mod timeout; // New module for timeout functionality
21 | pub mod tokenization; // New elastic search query parser
22 |                       // Temporarily commented out due to compilation issues
23 |                       // mod temp_frequency_search;
24 | 
25 | #[cfg(test)]
26 | mod file_processing_tests;
27 | 
28 | #[cfg(test)]
29 | mod test_token_limiter_failures;
30 | 
31 | // Public exports
32 | pub use search_options::SearchOptions;
33 | pub use search_output::format_and_print_search_results;
34 | pub use search_runner::perform_probe;
35 | 


--------------------------------------------------------------------------------
/site/.vitepress/theme/components/FeatureList.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <div class="vp-features">
 3 |     <div class="container">
 4 |       <div class="vp-features-grid">
 5 |         <Feature 
 6 |           v-for="(feature, index) in features" 
 7 |           :key="index" 
 8 |           :feature="feature" 
 9 |         />
10 |       </div>
11 |     </div>
12 |   </div>
13 | </template>
14 | 
15 | <script setup>
16 | import { useData } from 'vitepress'
17 | import Feature from './Feature.vue'
18 | 
19 | const { frontmatter } = useData()
20 | const features = frontmatter.value.features || []
21 | </script>
22 | 
23 | <style scoped>
24 | .vp-features {
25 |   padding: 48px 24px;
26 | }
27 | 
28 | @media (max-width: 640px) {
29 |   .vp-features {
30 |     padding: 32px 8px;
31 |   }
32 | }
33 | 
34 | @media (max-width: 480px) {
35 |   .vp-features {
36 |     padding: 24px 4px;
37 |   }
38 | }
39 | 
40 | .container {
41 |   margin: 0 auto;
42 |   max-width: 1152px;
43 | }
44 | 
45 | .vp-features-grid {
46 |   display: grid;
47 |   grid-template-columns: repeat(1, 1fr);
48 |   gap: 24px;
49 | }
50 | 
51 | @media (min-width: 640px) {
52 |   .vp-features-grid {
53 |     grid-template-columns: repeat(2, 1fr);
54 |   }
55 | }
56 | 
57 | @media (min-width: 960px) {
58 |   .vp-features-grid {
59 |     grid-template-columns: repeat(3, 1fr);
60 |   }
61 | }
62 | </style> 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use distroless for minimal attack surface and smaller image
 2 | FROM gcr.io/distroless/cc-debian12
 3 | 
 4 | # Build arguments for metadata
 5 | ARG VERSION=dev
 6 | ARG BUILD_DATE
 7 | ARG VCS_REF
 8 | ARG TARGETARCH
 9 | 
10 | # Add security and metadata labels
11 | LABEL maintainer="Probe Team" \
12 |       description="Probe - Code search tool" \
13 |       version="${VERSION}" \
14 |       org.opencontainers.image.created="${BUILD_DATE}" \
15 |       org.opencontainers.image.source="https://github.com/probelabs/probe" \
16 |       org.opencontainers.image.revision="${VCS_REF}" \
17 |       org.opencontainers.image.version="${VERSION}" \
18 |       org.opencontainers.image.title="Probe" \
19 |       org.opencontainers.image.description="AI-friendly code search tool built in Rust"
20 | 
21 | # Distroless images run as non-root by default and include CA certificates
22 | 
23 | # Copy the pre-built binary based on target architecture
24 | # TARGETARCH is automatically provided by Docker buildx (amd64, arm64)
25 | COPY binaries/${TARGETARCH}/probe /usr/local/bin/probe
26 | 
27 | # Health check using the binary (distroless runs as non-root by default)
28 | HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
29 |     CMD ["/usr/local/bin/probe", "--version"]
30 | 
31 | # Set the default command
32 | ENTRYPOINT ["/usr/local/bin/probe"]


--------------------------------------------------------------------------------
/examples/chat/test-image-spans.js:
--------------------------------------------------------------------------------
 1 | import { ProbeChat } from './probeChat.js';
 2 | 
 3 | // Test image extraction with OpenTelemetry spans
 4 | async function testImageExtraction() {
 5 |   console.log('Testing image extraction with OpenTelemetry spans...\n');
 6 |   
 7 |   try {
 8 |     // Create a ProbeChat instance with no API keys mode
 9 |     const probeChat = new ProbeChat({
10 |       debug: true,
11 |       noApiKeysMode: true
12 |     });
13 |     
14 |     // Test message with images
15 |     const testMessage = `
16 |       Here are some images:
17 |       - GitHub asset: https://github.com/user-attachments/assets/example.png
18 |       - Private image: https://private-user-images.githubusercontent.com/123/example.jpg
19 |       - Regular image: https://example.com/photo.jpeg
20 |       
21 |       And some text without images.
22 |     `;
23 |     
24 |     console.log('🔍 Testing chat with images (no API keys mode)...');
25 |     const result = await probeChat.chat(testMessage);
26 |     console.log('✅ Chat completed successfully');
27 |     console.log('📄 Response:', result.response.substring(0, 100) + '...');
28 |     
29 |     // Test completed
30 |     console.log('\n🎉 Test completed! Check test-image-spans.jsonl for trace data.');
31 |     
32 |   } catch (error) {
33 |     console.error('❌ Test failed:', error.message);
34 |   }
35 | }
36 | 
37 | testImageExtraction().catch(console.error);


--------------------------------------------------------------------------------
/npm/test-grep.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | 
 3 | import { grep } from './src/index.js';
 4 | 
 5 | async function testGrep() {
 6 | 	console.log('Testing grep functionality...\n');
 7 | 
 8 | 	try {
 9 | 		// Test 1: Basic search
10 | 		console.log('Test 1: Basic search for "TODO" in src directory');
11 | 		const result1 = await grep({
12 | 			pattern: 'TODO',
13 | 			paths: './src',
14 | 			lineNumbers: true
15 | 		});
16 | 		console.log('Result:');
17 | 		console.log(result1);
18 | 		console.log('\n---\n');
19 | 
20 | 		// Test 2: Case-insensitive search with count
21 | 		console.log('Test 2: Count "function" occurrences (case-insensitive)');
22 | 		const result2 = await grep({
23 | 			pattern: 'function',
24 | 			paths: './src',
25 | 			ignoreCase: true,
26 | 			count: true
27 | 		});
28 | 		console.log('Result:');
29 | 		console.log(result2);
30 | 		console.log('\n---\n');
31 | 
32 | 		// Test 3: Files with matches
33 | 		console.log('Test 3: Files containing "export"');
34 | 		const result3 = await grep({
35 | 			pattern: 'export',
36 | 			paths: './src',
37 | 			filesWithMatches: true
38 | 		});
39 | 		console.log('Result:');
40 | 		console.log(result3);
41 | 		console.log('\n---\n');
42 | 
43 | 		console.log('✅ All grep tests passed!');
44 | 	} catch (error) {
45 | 		console.error('❌ Test failed:', error.message);
46 | 		console.error(error);
47 | 		process.exit(1);
48 | 	}
49 | }
50 | 
51 | testGrep();
52 | 


--------------------------------------------------------------------------------
/examples/chat/npm/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@probelabs/probe-chat",
 3 |   "version": "1.0.0",
 4 |   "description": "CLI chat interface for Probe code search",
 5 |   "main": "index.js",
 6 |   "type": "module",
 7 |   "bin": {
 8 |     "probe-chat": "./bin/probe-chat.js"
 9 |   },
10 |   "scripts": {
11 |     "test": "echo \"Error: no test specified\" && exit 1",
12 |     "prepublishOnly": "chmod +x ./bin/probe-chat.js"
13 |   },
14 |   "keywords": [
15 |     "probe",
16 |     "code-search",
17 |     "chat",
18 |     "ai",
19 |     "cli"
20 |   ],
21 |   "author": "Leonid Bugaev",
22 |   "license": "ISC",
23 |   "dependencies": {
24 |     "@ai-sdk/anthropic": "^0.0.9",
25 |     "@ai-sdk/openai": "^0.0.9",
26 |     "@probelabs/probe": "*",
27 |     "ai": "^4.1.41",
28 |     "chalk": "^5.3.0",
29 |     "commander": "^11.1.0",
30 |     "dotenv": "^16.3.1",
31 |     "inquirer": "^9.2.12",
32 |     "ora": "^7.0.1"
33 |   },
34 |   "engines": {
35 |     "node": ">=18.0.0"
36 |   },
37 |   "repository": {
38 |     "type": "git",
39 |     "url": "git+https://github.com/probelabs/probe.git"
40 |   },
41 |   "bugs": {
42 |     "url": "https://github.com/probelabs/probe/issues"
43 |   },
44 |   "homepage": "https://github.com/probelabs/probe#readme",
45 |   "publishConfig": {
46 |     "access": "public"
47 |   },
48 |   "files": [
49 |     "bin/",
50 |     "index.js",
51 |     "README.md",
52 |     "LICENSE"
53 |   ]
54 | }
55 | 


--------------------------------------------------------------------------------
/src/language/language_trait.rs:
--------------------------------------------------------------------------------
 1 | use tree_sitter::{Language as TSLanguage, Node};
 2 | 
 3 | /// Trait that defines the interface for all language implementations.
 4 | pub trait LanguageImpl {
 5 |     /// Get the tree-sitter language for parsing
 6 |     fn get_tree_sitter_language(&self) -> TSLanguage;
 7 | 
 8 |     /// Check if a node is an acceptable container/parent entity
 9 |     fn is_acceptable_parent(&self, node: &Node) -> bool;
10 | 
11 |     /// Check if a node represents a test
12 |     fn is_test_node(&self, node: &Node, source: &[u8]) -> bool;
13 | 
14 |     /// Get the file extension for this language
15 |     #[deprecated(since = "0.1.0", note = "this method is not used")]
16 |     #[allow(dead_code)]
17 |     fn get_extension(&self) -> &'static str;
18 | 
19 |     /// Find the parent function or method declaration for a node (if any)
20 |     fn find_parent_function<'a>(&self, _node: Node<'a>) -> Option<Node<'a>> {
21 |         // Default implementation returns None
22 |         None
23 |     }
24 | 
25 |     /// Extract the symbol signature without implementation body
26 |     /// Returns a clean signature for functions, structs, classes, methods, constants, etc.
27 |     fn get_symbol_signature(&self, _node: &Node, _source: &[u8]) -> Option<String> {
28 |         // Default implementation returns None
29 |         // Each language should implement this to extract clean signatures
30 |         None
31 |     }
32 | }
33 | 


--------------------------------------------------------------------------------
/examples/chat/test-chat-tracing.js:
--------------------------------------------------------------------------------
 1 | import { ProbeChat } from './probeChat.js';
 2 | 
 3 | // Test chat function tracing
 4 | async function testChatTracing() {
 5 |   console.log('Testing chat tracing...\n');
 6 |   
 7 |   try {
 8 |     // Create a ProbeChat instance with debug enabled
 9 |     const probeChat = new ProbeChat({
10 |       debug: true,
11 |       noApiKeysMode: true
12 |     });
13 |     
14 |     // Test message with images
15 |     const testMessage = 'Here is an image: https://github.com/user-attachments/assets/example.png and some text.';
16 |     
17 |     console.log('🔍 Testing chat function with tracing...');
18 |     console.log('Message:', testMessage);
19 |     
20 |     // Call the chat function - this should create spans
21 |     const result = await probeChat.chat(testMessage);
22 |     
23 |     console.log('✅ Chat completed successfully');
24 |     console.log('📄 Response length:', result.response.length);
25 |     console.log('📄 Response preview:', result.response.substring(0, 100) + '...');
26 |     
27 |     console.log('🎉 Test completed! Check simple-traces.jsonl for trace data.');
28 |     
29 |     // Wait a bit for telemetry to flush
30 |     console.log('⏳ Waiting for telemetry to flush...');
31 |     await new Promise(resolve => setTimeout(resolve, 2000));
32 |     
33 |   } catch (error) {
34 |     console.error('❌ Test failed:', error.message);
35 |   }
36 | }
37 | 
38 | testChatTracing().catch(console.error);


--------------------------------------------------------------------------------
/npm/tests/setup.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Jest setup file
 3 |  * This file runs before all tests to set up the testing environment
 4 |  */
 5 | import { jest, beforeEach, afterEach } from '@jest/globals';
 6 | import fs from 'fs';
 7 | import path from 'path';
 8 | 
 9 | // Set environment to test
10 | process.env.NODE_ENV = 'test';
11 | 
12 | // Disable debug logging during tests unless explicitly enabled
13 | if (!process.env.TEST_DEBUG) {
14 |   process.env.DEBUG = '';
15 | }
16 | 
17 | // Prefer local binary in repository to avoid network during tests
18 | try {
19 |   const isWin = process.platform === 'win32';
20 |   const binDir = path.resolve(__dirname, '..', 'bin');
21 |   const candidate = path.join(binDir, isWin ? 'probe.exe' : 'probe-binary');
22 |   if (fs.existsSync(candidate)) {
23 |     process.env.PROBE_PATH = candidate;
24 |   }
25 | } catch {}
26 | 
27 | // Global test timeout (can be overridden per test)
28 | jest.setTimeout(10000);
29 | 
30 | // Mock console methods to avoid cluttering test output
31 | const originalConsole = { ...console };
32 | beforeEach(() => {
33 |   if (!process.env.TEST_VERBOSE) {
34 |     console.log = jest.fn();
35 |     console.error = jest.fn();
36 |     console.warn = jest.fn();
37 |   }
38 | });
39 | 
40 | afterEach(() => {
41 |   if (!process.env.TEST_VERBOSE) {
42 |     console.log = originalConsole.log;
43 |     console.error = originalConsole.error;
44 |     console.warn = originalConsole.warn;
45 |   }
46 | });
47 | 


--------------------------------------------------------------------------------
/npm/tests/unit/types-probe-agent-options.test.js:
--------------------------------------------------------------------------------
 1 | import { describe, test, expect } from '@jest/globals';
 2 | import ts from 'typescript';
 3 | 
 4 | /**
 5 |  * Regression test: ensure the public TypeScript surface exposes tool filtering
 6 |  * and system prompt options. We compile a tiny snippet and assert no diagnostics.
 7 |  */
 8 | describe('Type definitions: ProbeAgentOptions', () => {
 9 |   const compile = (source) => {
10 |     const result = ts.transpileModule(source, {
11 |       compilerOptions: {
12 |         target: ts.ScriptTarget.ES2020,
13 |         module: ts.ModuleKind.ESNext,
14 |         moduleResolution: ts.ModuleResolutionKind.Node16,
15 |         strict: true,
16 |         skipLibCheck: true,
17 |         isolatedModules: true,
18 |         allowImportingTsExtensions: true,
19 |         types: [],
20 |       }
21 |     });
22 |     return result.diagnostics || [];
23 |   };
24 | 
25 |   test('accepts systemPrompt, allowedTools, and disableTools', () => {
26 |     const diagnostics = compile(`
27 |       import { ProbeAgent, type ProbeAgentOptions } from '../..';
28 | 
29 |       const options: ProbeAgentOptions = {
30 |         systemPrompt: 'hello',
31 |         customPrompt: 'fallback',
32 |         allowedTools: ['search', '!bash'],
33 |         disableTools: false,
34 |       };
35 | 
36 |       const agent = new ProbeAgent(options);
37 |       void agent;
38 |     `);
39 | 
40 |     expect(diagnostics.length).toBe(0);
41 |   });
42 | });
43 | 


--------------------------------------------------------------------------------
/site/.vitepress/theme/index.js:
--------------------------------------------------------------------------------
 1 | import { h } from 'vue'
 2 | import DefaultTheme from 'vitepress/theme'
 3 | import './custom.css'
 4 | import './home.css'
 5 | import './blog.css'
 6 | import FeatureList from './components/FeatureList.vue'
 7 | import CodeEditor from './components/CodeEditor.vue'
 8 | import CommandExample from './components/CommandExample.vue'
 9 | import BlogPostLayout from './components/BlogPostLayout.vue'
10 | import BlogLayout from './layouts/BlogLayout.vue'
11 | import FeatureSection from '../components/FeatureSection.vue'
12 | import SimpleFeatureSection from '../components/SimpleFeatureSection.vue'
13 | import StarsBackground from '../components/StarsBackground.vue'
14 | import HomeFeatures from '../components/HomeFeatures.vue'
15 | 
16 | export default {
17 | 	...DefaultTheme,
18 | 	Layout() {
19 | 		return h(DefaultTheme.Layout, null, {
20 | 			'home-features-after': () => h(FeatureList)
21 | 		});
22 | 	},
23 | 	enhanceApp({ app }) {
24 | 		// Register global components
25 | 		app.component('FeatureList', FeatureList)
26 | 		app.component('CodeEditor', CodeEditor)
27 | 		app.component('CommandExample', CommandExample)
28 | 		app.component('BlogPostLayout', BlogPostLayout)
29 | 		app.component('BlogLayout', BlogLayout)
30 | 		app.component('FeatureSection', FeatureSection)
31 | 		app.component('SimpleFeatureSection', SimpleFeatureSection)
32 | 		app.component('StarsBackground', StarsBackground)
33 | 		app.component('HomeFeatures', HomeFeatures)
34 | 	}
35 | }
36 | 


--------------------------------------------------------------------------------
/npm/src/cli.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | 
 3 | /**
 4 |  * CLI wrapper for the probe binary
 5 |  * 
 6 |  * This script ensures the probe binary is downloaded and then executes it with the provided arguments.
 7 |  * It's designed to be as lightweight as possible, essentially just passing through to the actual binary.
 8 |  */
 9 | 
10 | import { spawn } from 'child_process';
11 | import { getBinaryPath } from './utils.js';
12 | 
13 | /**
14 |  * Main function
15 |  */
16 | async function main() {
17 | 	try {
18 | 		// Get the path to the probe binary (this will download it if needed)
19 | 		const binaryPath = await getBinaryPath();
20 | 
21 | 		// Get the arguments passed to the CLI
22 | 		const args = process.argv.slice(2);
23 | 
24 | 		// Spawn the probe binary with the provided arguments
25 | 		const probeProcess = spawn(binaryPath, args, {
26 | 			stdio: 'inherit' // Pipe stdin/stdout/stderr to the parent process
27 | 		});
28 | 
29 | 		// Handle process exit
30 | 		probeProcess.on('close', (code) => {
31 | 			process.exit(code);
32 | 		});
33 | 
34 | 		// Handle process errors
35 | 		probeProcess.on('error', (error) => {
36 | 			console.error(`Error executing probe binary: ${error.message}`);
37 | 			process.exit(1);
38 | 		});
39 | 	} catch (error) {
40 | 		console.error(`Error: ${error.message}`);
41 | 		process.exit(1);
42 | 	}
43 | }
44 | 
45 | // Execute the main function
46 | main().catch(error => {
47 | 	console.error(`Unexpected error: ${error.message}`);
48 | 	process.exit(1);
49 | });


--------------------------------------------------------------------------------
/site/.vitepress/components/FullWidthFeatureSection.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <div class="full-width-feature-section">
 3 |     <div class="content-wrapper vp-doc">
 4 |       <slot name="title"></slot>
 5 |       <slot name="content"></slot>
 6 |     </div>
 7 |   </div>
 8 | </template>
 9 | 
10 | <script setup>
11 | // No props needed for this simplified component
12 | </script>
13 | 
14 | <style scoped>
15 | .full-width-feature-section {
16 |   margin: 3rem 0;
17 | }
18 | 
19 | @media (max-width: 640px) {
20 |   .full-width-feature-section {
21 |     margin: 2rem 0;
22 |   }
23 | }
24 | 
25 | .content-wrapper {
26 |   width: 100%;
27 |   max-width: 100%;
28 | }
29 | 
30 | .content-wrapper :deep(h2) {
31 |   margin-top: 0;
32 |   font-size: 1.8rem;
33 |   line-height: 1.4;
34 |   color: var(--vp-c-text-1);
35 |   border-top: none;
36 |   padding-top: 0;
37 |   text-align: center;
38 | }
39 | 
40 | @media (max-width: 640px) {
41 |   .content-wrapper :deep(h2) {
42 |     font-size: 1.5rem;
43 |     line-height: 1.3;
44 |     padding: 0 0.5rem;
45 |   }
46 | }
47 | 
48 | .content-wrapper :deep(p) {
49 |   margin: 1rem 0;
50 |   font-size: 1.0rem;
51 |   line-height: 1.6;
52 |   color: var(--vp-c-text-2);
53 |   text-align: left;
54 |   max-width: 800px;
55 |   margin-left: auto;
56 |   margin-right: auto;
57 | }
58 | 
59 | @media (max-width: 640px) {
60 |   .content-wrapper :deep(p) {
61 |     font-size: 0.95rem;
62 |     line-height: 1.5;
63 |     padding: 0 0.5rem;
64 |     margin: 0.75rem 0;
65 |   }
66 | }
67 | </style>


--------------------------------------------------------------------------------
/npm/src/agent/shared/Session.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Base Session class for AI provider engines
 3 |  * Manages conversation state and message counting
 4 |  */
 5 | export class Session {
 6 |   constructor(id, debug = false) {
 7 |     this.id = id;
 8 |     this.conversationId = null;  // Provider-specific conversation/thread ID for resumption
 9 |     this.messageCount = 0;
10 |     this.debug = debug;
11 |   }
12 | 
13 |   /**
14 |    * Set the conversation ID for session resumption
15 |    * @param {string} conversationId - Provider's conversation/thread ID
16 |    */
17 |   setConversationId(conversationId) {
18 |     this.conversationId = conversationId;
19 |     if (this.debug) {
20 |       console.log(`[Session ${this.id}] Conversation ID: ${conversationId}`);
21 |     }
22 |   }
23 | 
24 |   /**
25 |    * Increment the message count
26 |    */
27 |   incrementMessageCount() {
28 |     this.messageCount++;
29 |   }
30 | 
31 |   /**
32 |    * Get session info as plain object
33 |    * @returns {Object} Session information
34 |    */
35 |   getInfo() {
36 |     return {
37 |       id: this.id,
38 |       conversationId: this.conversationId,
39 |       messageCount: this.messageCount
40 |     };
41 |   }
42 | 
43 |   /**
44 |    * Get resume arguments for CLI commands (used by Claude Code)
45 |    * @returns {Array<string>} CLI arguments for resuming conversation
46 |    */
47 |   getResumeArgs() {
48 |     if (this.conversationId && this.messageCount > 0) {
49 |       return ['--resume', this.conversationId];
50 |     }
51 |     return [];
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/examples/chat/test-direct-function.js:
--------------------------------------------------------------------------------
 1 | // Test direct function call with telemetry
 2 | import { TelemetryConfig } from './telemetry.js';
 3 | import { trace } from '@opentelemetry/api';
 4 | 
 5 | // Initialize telemetry first
 6 | const telemetryConfig = new TelemetryConfig({
 7 |   enableFile: true,
 8 |   enableConsole: true,
 9 |   filePath: './direct-test-traces.jsonl'
10 | });
11 | 
12 | telemetryConfig.initialize();
13 | 
14 | // Test function with tracing
15 | function testFunction() {
16 |   const tracer = trace.getTracer('direct-test');
17 |   return tracer.startActiveSpan('testFunction', (span) => {
18 |     try {
19 |       console.log('🔍 Inside test function with span');
20 |       
21 |       span.setAttributes({
22 |         'test.name': 'direct-function-test',
23 |         'test.timestamp': Date.now()
24 |       });
25 |       
26 |       const result = 'Test completed successfully';
27 |       span.setStatus({ code: 1 }); // SUCCESS
28 |       return result;
29 |     } catch (error) {
30 |       span.recordException(error);
31 |       span.setStatus({ code: 2, message: error.message });
32 |       throw error;
33 |     } finally {
34 |       span.end();
35 |     }
36 |   });
37 | }
38 | 
39 | // Test the function
40 | console.log('Testing direct function call with telemetry...');
41 | const result = testFunction();
42 | console.log('✅ Result:', result);
43 | 
44 | // Wait and shutdown
45 | setTimeout(async () => {
46 |   console.log('⏳ Shutting down telemetry...');
47 |   await telemetryConfig.shutdown();
48 |   console.log('🎉 Test completed!');
49 | }, 2000);


--------------------------------------------------------------------------------
/npm/src/agent/storage/InMemoryStorageAdapter.js:
--------------------------------------------------------------------------------
 1 | import { StorageAdapter } from './StorageAdapter.js';
 2 | 
 3 | /**
 4 |  * Default in-memory storage adapter
 5 |  * This is the default behavior - stores history in a Map in memory
 6 |  */
 7 | export class InMemoryStorageAdapter extends StorageAdapter {
 8 |   constructor() {
 9 |     super();
10 |     this.sessions = new Map(); // sessionId -> {messages: [], metadata: {}}
11 |   }
12 | 
13 |   async loadHistory(sessionId) {
14 |     const session = this.sessions.get(sessionId);
15 |     return session ? session.messages : [];
16 |   }
17 | 
18 |   async saveMessage(sessionId, message) {
19 |     if (!this.sessions.has(sessionId)) {
20 |       this.sessions.set(sessionId, {
21 |         messages: [],
22 |         metadata: {
23 |           createdAt: new Date().toISOString(),
24 |           lastActivity: new Date().toISOString()
25 |         }
26 |       });
27 |     }
28 | 
29 |     const session = this.sessions.get(sessionId);
30 |     session.messages.push(message);
31 |     session.metadata.lastActivity = new Date().toISOString();
32 |   }
33 | 
34 |   async clearHistory(sessionId) {
35 |     this.sessions.delete(sessionId);
36 |   }
37 | 
38 |   async getSessionMetadata(sessionId) {
39 |     const session = this.sessions.get(sessionId);
40 |     return session ? session.metadata : null;
41 |   }
42 | 
43 |   async updateSessionActivity(sessionId) {
44 |     const session = this.sessions.get(sessionId);
45 |     if (session) {
46 |       session.metadata.lastActivity = new Date().toISOString();
47 |     }
48 |   }
49 | }
50 | 


--------------------------------------------------------------------------------
/npm/src/agent/mockProvider.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Mock AI provider for testing purposes
 3 |  * This provider simulates AI responses without making actual API calls
 4 |  */
 5 | 
 6 | export function createMockProvider() {
 7 |   return {
 8 |     languageModel: (modelName) => ({
 9 |       modelId: `mock-${modelName}`,
10 |       provider: 'mock',
11 | 
12 |       // Mock the doGenerate method used by Vercel AI SDK
13 |       doGenerate: async ({ messages, tools }) => {
14 |         // Simulate processing time
15 |         await new Promise(resolve => setTimeout(resolve, 10));
16 | 
17 |         // Return a mock response
18 |         return {
19 |           text: 'This is a mock response for testing',
20 |           toolCalls: [],
21 |           usage: {
22 |             promptTokens: 10,
23 |             completionTokens: 5,
24 |             totalTokens: 15
25 |           }
26 |         };
27 |       },
28 | 
29 |       // Mock the doStream method for streaming responses
30 |       doStream: async function* ({ messages, tools }) {
31 |         // Simulate streaming response
32 |         yield {
33 |           type: 'text-delta',
34 |           textDelta: 'Mock streaming response'
35 |         };
36 | 
37 |         yield {
38 |           type: 'finish',
39 |           usage: {
40 |             promptTokens: 10,
41 |             completionTokens: 5,
42 |             totalTokens: 15
43 |           }
44 |         };
45 |       }
46 |     })
47 |   };
48 | }
49 | 
50 | export function createMockModel(modelName = 'mock-model') {
51 |   const provider = createMockProvider();
52 |   return provider.languageModel(modelName);
53 | }


--------------------------------------------------------------------------------
/npm/bin/binaries/README.md:
--------------------------------------------------------------------------------
 1 | # Bundled Probe Binaries
 2 | 
 3 | This directory contains pre-compiled probe binaries for all supported platforms, bundled with the npm package to enable offline installation.
 4 | 
 5 | ## Expected Files
 6 | 
 7 | The CI/CD pipeline should place the following compressed binaries here before publishing to npm:
 8 | 
 9 | - `probe-v{VERSION}-x86_64-unknown-linux-musl.tar.gz` - Linux x64 (static)
10 | - `probe-v{VERSION}-aarch64-unknown-linux-musl.tar.gz` - Linux ARM64 (static)
11 | - `probe-v{VERSION}-x86_64-apple-darwin.tar.gz` - macOS Intel
12 | - `probe-v{VERSION}-aarch64-apple-darwin.tar.gz` - macOS Apple Silicon
13 | - `probe-v{VERSION}-x86_64-pc-windows-msvc.zip` - Windows x64
14 | 
15 | ## File Size
16 | 
17 | Each compressed binary is approximately 5MB, totaling ~25MB for all 5 platforms.
18 | 
19 | ## Installation Flow
20 | 
21 | 1. **Postinstall script** (`scripts/postinstall.js`) detects the current platform
22 | 2. **Extraction** (`src/extractor.js`) extracts the matching bundled binary
23 | 3. **Fallback**: If no bundled binary is found, downloads from GitHub releases
24 | 
25 | ## CI Integration
26 | 
27 | The release workflow (`.github/workflows/release.yml`) should:
28 | 
29 | 1. Build binaries for all 5 platforms
30 | 2. Create compressed archives (`.tar.gz` or `.zip`)
31 | 3. Copy them to `npm/bin/binaries/` before running `npm publish`
32 | 
33 | Example CI step:
34 | ```yaml
35 | - name: Copy binaries to npm package
36 |   run: |
37 |     mkdir -p npm/bin/binaries
38 |     cp dist/probe-v$VERSION-*.tar.gz npm/bin/binaries/
39 |     cp dist/probe-v$VERSION-*.zip npm/bin/binaries/
40 | ```
41 | 


--------------------------------------------------------------------------------
/.github/workflows/README-docker.md:
--------------------------------------------------------------------------------
 1 | # Docker CI/CD Setup
 2 | 
 3 | This document describes the Docker CI/CD setup for the Probe project.
 4 | 
 5 | ## Required Secrets
 6 | 
 7 | The following secrets need to be configured in your GitHub repository settings:
 8 | 
 9 | 1. **`DOCKER_HUB_TOKEN`** - Docker Hub access token for pushing images
10 |    - Create at: https://hub.docker.com/settings/security
11 |    - Required permissions: Read, Write, Delete
12 | 
13 | ## Optional Variables
14 | 
15 | The following variables can be configured in repository settings:
16 | 
17 | 1. **`DOCKER_HUB_USERNAME`** - Docker Hub username (defaults to 'buger')
18 | 
19 | ## Workflow Integration
20 | 
21 | ### release.yml
22 | The Docker build and publish process is integrated into the main release workflow:
23 | - Triggers on version tags (v*)
24 | - Builds multi-platform images (linux/amd64, linux/arm64)
25 | - Publishes versioned images to Docker Hub
26 | - Updates Docker Hub descriptions
27 | - Tags: `X.Y.Z` and `latest`
28 | 
29 | The `publish-docker-images` job runs after the binary releases are complete, ensuring all release artifacts are available.
30 | 
31 | ## Image Naming
32 | 
33 | - Probe CLI: `buger/probe`
34 | - Probe Chat: `buger/probe-chat`
35 | 
36 | ## Testing Locally
37 | 
38 | ```bash
39 | # Test the full release workflow (including Docker builds)
40 | act -j publish-docker-images --secret DOCKER_HUB_TOKEN=your_token -e <(echo '{"ref": "refs/tags/v1.0.0"}')
41 | 
42 | # Test Docker builds locally
43 | docker build -t probe-test .
44 | docker build -t probe-chat-test -f examples/chat/Dockerfile examples/chat
45 | 
46 | # Test multi-platform builds locally
47 | docker buildx build --platform linux/amd64,linux/arm64 -t probe-test .
48 | ```


--------------------------------------------------------------------------------
/src/search/timeout.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::atomic::{AtomicBool, Ordering};
 2 | use std::sync::Arc;
 3 | use std::thread;
 4 | use std::time::Duration;
 5 | 
 6 | /// Starts a timeout thread that will terminate the process if the timeout is reached.
 7 | /// Returns a handle to the timeout thread that can be used to stop it.
 8 | pub fn start_timeout_thread(timeout_seconds: u64) -> Arc<AtomicBool> {
 9 |     let should_stop = Arc::new(AtomicBool::new(false));
10 |     let should_stop_clone = should_stop.clone();
11 | 
12 |     // For testing purposes, check if we're running in a test environment
13 |     let is_test = std::env::var("RUST_TEST_THREADS").is_ok();
14 | 
15 |     // Use a shorter sleep interval for tests to make timeouts more reliable
16 |     let sleep_interval = if is_test {
17 |         Duration::from_millis(10) // 100ms for tests
18 |     } else {
19 |         Duration::from_secs(1) // 1 second for normal operation
20 |     };
21 | 
22 |     thread::spawn(move || {
23 |         let mut elapsed_time = Duration::from_secs(0);
24 |         let timeout_duration = Duration::from_secs(timeout_seconds);
25 | 
26 |         while elapsed_time < timeout_duration {
27 |             // Check if we should stop the timeout thread
28 |             if should_stop_clone.load(Ordering::SeqCst) {
29 |                 return;
30 |             }
31 | 
32 |             // Sleep for the interval
33 |             thread::sleep(sleep_interval);
34 |             elapsed_time += sleep_interval;
35 |         }
36 | 
37 |         // Timeout reached, print a message and terminate the process
38 |         eprintln!("Search operation timed out after {timeout_seconds} seconds");
39 |         std::process::exit(1);
40 |     });
41 | 
42 |     should_stop
43 | }
44 | 


--------------------------------------------------------------------------------
/npm/src/agent/mcp/index.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MCP (Model Context Protocol) integration for ProbeAgent
 3 |  *
 4 |  * This module provides:
 5 |  * - MCP client management for connecting to MCP servers
 6 |  * - XML/JSON hybrid tool interface
 7 |  * - Configuration management
 8 |  */
 9 | 
10 | // Re-export main classes and functions
11 | export { MCPClientManager, createMCPManager, createTransport } from './client.js';
12 | export {
13 |   loadMCPConfiguration,
14 |   loadMCPConfigurationFromPath,
15 |   parseEnabledServers,
16 |   createSampleConfig,
17 |   saveConfig
18 | } from './config.js';
19 | export {
20 |   MCPXmlBridge,
21 |   mcpToolToXmlDefinition,
22 |   parseXmlMcpToolCall,
23 |   parseHybridXmlToolCall,
24 |   createHybridSystemMessage
25 | } from './xmlBridge.js';
26 | 
27 | // Import for default export
28 | import { MCPClientManager, createMCPManager, createTransport } from './client.js';
29 | import {
30 |   loadMCPConfiguration,
31 |   loadMCPConfigurationFromPath,
32 |   parseEnabledServers,
33 |   createSampleConfig,
34 |   saveConfig
35 | } from './config.js';
36 | import {
37 |   MCPXmlBridge,
38 |   mcpToolToXmlDefinition,
39 |   parseXmlMcpToolCall,
40 |   parseHybridXmlToolCall,
41 |   createHybridSystemMessage
42 | } from './xmlBridge.js';
43 | 
44 | // Default export for convenience
45 | export default {
46 |   // Client
47 |   MCPClientManager,
48 |   createMCPManager,
49 |   createTransport,
50 | 
51 |   // Config
52 |   loadMCPConfiguration,
53 |   loadMCPConfigurationFromPath,
54 |   parseEnabledServers,
55 |   createSampleConfig,
56 |   saveConfig,
57 | 
58 |   // XML Bridge
59 |   MCPXmlBridge,
60 |   mcpToolToXmlDefinition,
61 |   parseXmlMcpToolCall,
62 |   parseHybridXmlToolCall,
63 |   createHybridSystemMessage
64 | };


--------------------------------------------------------------------------------
/examples/cache_demo.rs:
--------------------------------------------------------------------------------
 1 | use probe_code::language::parser::parse_file_for_code_blocks;
 2 | use std::collections::HashSet;
 3 | 
 4 | fn main() {
 5 |     // Set up test content
 6 |     let content = r#"
 7 | fn test_function() {
 8 |     // This is a comment
 9 |     let x = 42;
10 |     println!("Hello, world!");
11 | }
12 | 
13 | struct TestStruct {
14 |     field1: i32,
15 |     field2: String,
16 | }
17 | "#;
18 | 
19 |     // Create a set of line numbers to extract
20 |     let mut line_numbers = HashSet::new();
21 |     line_numbers.insert(3); // Comment line
22 |     line_numbers.insert(4); // Code line
23 |     line_numbers.insert(8); // Struct field line
24 | 
25 |     println!("First call (should be a cache miss):");
26 |     let result1 = parse_file_for_code_blocks(content, "rs", &line_numbers, true, None).unwrap();
27 |     println!("Found {} code blocks", result1.len());
28 | 
29 |     println!("\nSecond call (should be a cache hit):");
30 |     let result2 = parse_file_for_code_blocks(content, "rs", &line_numbers, true, None).unwrap();
31 |     println!("Found {} code blocks", result2.len());
32 | 
33 |     println!("\nThird call with different allow_tests flag (should be a cache miss):");
34 |     let result3 = parse_file_for_code_blocks(content, "rs", &line_numbers, false, None).unwrap();
35 |     println!("Found {} code blocks", result3.len());
36 | 
37 |     println!("\nFourth call with different content (should be a cache miss):");
38 |     let content2 = r#"
39 | fn different_function() {
40 |     // This is a different comment
41 |     let y = 100;
42 | }
43 | "#;
44 |     let result4 = parse_file_for_code_blocks(content2, "rs", &line_numbers, true, None).unwrap();
45 |     println!("Found {} code blocks", result4.len());
46 | }
47 | 


--------------------------------------------------------------------------------
/examples/reranker/test_bert_results.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "🔍 REAL BERT RERANKER - QUALITY AND PERFORMANCE ANALYSIS"
 4 | echo "========================================================"
 5 | echo ""
 6 | 
 7 | cd /Users/leonidbugaev/go/src/code-search/examples/reranker
 8 | 
 9 | echo "=== Performance Analysis ==="
10 | echo ""
11 | 
12 | echo "📊 Small scale (10 docs):"
13 | ./target/release/benchmark --query "search algorithm" --num-docs 10 --iterations 3 --batch-size 5
14 | 
15 | echo ""
16 | echo "📊 Medium scale (25 docs):"
17 | ./target/release/benchmark --query "async rust programming" --num-docs 25 --iterations 2 --batch-size 10
18 | 
19 | echo ""
20 | echo "📊 Large scale (50 docs):"
21 | ./target/release/benchmark --query "machine learning optimization" --num-docs 50 --iterations 1 --batch-size 25
22 | 
23 | echo ""
24 | echo "=== Comparison: Demo vs Real BERT ==="
25 | echo ""
26 | 
27 | echo "🚀 Demo reranker (mock algorithm):"
28 | ./target/release/benchmark --demo --query "rust async programming" --num-docs 50 --iterations 2 --batch-size 25
29 | 
30 | echo ""
31 | echo "🧠 Real BERT reranker:"
32 | ./target/release/benchmark --query "rust async programming" --num-docs 50 --iterations 2 --batch-size 25
33 | 
34 | echo ""
35 | echo "========================================================"
36 | echo "✅ REAL BERT PERFORMANCE ANALYSIS COMPLETE"
37 | echo ""
38 | echo "KEY FINDINGS:"
39 | echo "• Real BERT: ~7-8 docs/second (semantic understanding)"
40 | echo "• Demo reranker: ~80,000+ docs/second (simple matching)"
41 | echo "• BERT model loading: ~0.04-0.06 seconds"
42 | echo "• Per-document processing: ~125-130ms"
43 | echo "• Memory usage: ~45MB model + runtime overhead"
44 | echo "========================================================"


--------------------------------------------------------------------------------
/examples/reranker/simple_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Minimal test using sentence-transformers which handles dependencies better
 4 | """
 5 | 
 6 | try:
 7 |     from sentence_transformers import CrossEncoder
 8 |     print("✓ sentence-transformers imported successfully")
 9 | except ImportError:
10 |     print("Installing sentence-transformers...")
11 |     import subprocess
12 |     subprocess.check_call([sys.executable, "-m", "pip", "install", "sentence-transformers"])
13 |     from sentence_transformers import CrossEncoder
14 | 
15 | # Test inputs
16 | queries = [
17 |     "how does authentication work",
18 |     "foobar random nonsense gibberish"
19 | ]
20 | 
21 | document = """Authentication is the process of verifying the identity of a user, device, or system. 
22 | In web applications, authentication typically involves checking credentials like usernames 
23 | and passwords against a database."""
24 | 
25 | # Load model
26 | print("Loading cross-encoder model...")
27 | model = CrossEncoder('cross-encoder/ms-marco-TinyBERT-L-2-v2', max_length=512)
28 | print("Model loaded!")
29 | 
30 | # Score pairs
31 | print("\nScoring query-document pairs:")
32 | print("-" * 50)
33 | 
34 | scores = []
35 | for query in queries:
36 |     score = model.predict([(query, document)])[0]
37 |     scores.append(score)
38 |     print(f"Query: '{query}'")
39 |     print(f"Score: {score:.6f}\n")
40 | 
41 | # Compare
42 | print("Comparison:")
43 | print(f"Relevant query score: {scores[0]:.6f}")
44 | print(f"Nonsense query score: {scores[1]:.6f}")
45 | print(f"Difference: {scores[0] - scores[1]:.6f}")
46 | 
47 | if scores[0] > scores[1] + 0.1:
48 |     print("\n✓ Good: Relevant query scores higher")
49 | else:
50 |     print("\n⚠ Poor discrimination between queries")


--------------------------------------------------------------------------------
/npm/src/agent/storage/StorageAdapter.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Base class for storage adapters
 3 |  * Implement this interface to provide custom storage backends for ProbeAgent history
 4 |  */
 5 | export class StorageAdapter {
 6 |   /**
 7 |    * Load conversation history for a session
 8 |    * @param {string} sessionId - Session identifier
 9 |    * @returns {Promise<Array<Object>>} Array of message objects with {role, content, ...}
10 |    */
11 |   async loadHistory(sessionId) {
12 |     throw new Error('StorageAdapter.loadHistory() must be implemented by subclass');
13 |   }
14 | 
15 |   /**
16 |    * Save a message to storage
17 |    * @param {string} sessionId - Session identifier
18 |    * @param {Object} message - Message object { role, content, ... }
19 |    * @returns {Promise<void>}
20 |    */
21 |   async saveMessage(sessionId, message) {
22 |     throw new Error('StorageAdapter.saveMessage() must be implemented by subclass');
23 |   }
24 | 
25 |   /**
26 |    * Clear history for a session
27 |    * @param {string} sessionId - Session identifier
28 |    * @returns {Promise<void>}
29 |    */
30 |   async clearHistory(sessionId) {
31 |     throw new Error('StorageAdapter.clearHistory() must be implemented by subclass');
32 |   }
33 | 
34 |   /**
35 |    * Get session metadata (optional)
36 |    * @param {string} sessionId - Session identifier
37 |    * @returns {Promise<Object|null>} Session metadata or null
38 |    */
39 |   async getSessionMetadata(sessionId) {
40 |     return null;
41 |   }
42 | 
43 |   /**
44 |    * Update session activity timestamp (optional)
45 |    * @param {string} sessionId - Session identifier
46 |    * @returns {Promise<void>}
47 |    */
48 |   async updateSessionActivity(sessionId) {
49 |     // Optional - implement if you want to track session activity
50 |   }
51 | }
52 | 


--------------------------------------------------------------------------------
/examples/chat/test-github-context.txt:
--------------------------------------------------------------------------------
 1 | <github_context type="issue" number="63">
 2 |   <details><title><![CDATA[Image test]]></title><body><![CDATA[<img width="1078" height="1872" alt="Image" src="https://github.com/user-attachments/assets/6c1292af-3e0b-4f45-8ef9-609102dea5fb" />
 3 | 
 4 | https://private-user-images.githubusercontent.com/221343105/467536716-0b9bb81b-8b8a-4b00-aae7-0d8d109e28ce.png?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NTI3NjEwNTgsIm5iZiI6MTc1Mjc2MDc1OCwicGF0aCI6Ii8yMjEzNDMxMDUvNDY3NTM2NzE2LTBiOWJiODFiLThiOGEtNGIwMC1hYWU3LTBkOGQxMDllMjhjZS5wbmc_WC1BbXotQWxnb3JpdGhtPUFXUzQtSE1BQy1TSEEyNTYmWC1BbXotQ3JlZGVudGlhbD1BS0lBVkNPRFlMU0E1M1BRSzRaQSUyRjIwMjUwNzE3JTJGdXMtZWFzdC0xJTJGczMlMkZhd3M0X3JlcXVlc3QmWC1BbXotRGF0ZT0yMDI1MDcxN1QxMzU5MThaJlgtQW16LUV4cGlyZXM9MzAwJlgtQW16LVNpZ25hdHVyZT0xMzJjOWJlZGEzZmEyZjEyNWQxNDRkZDI5Y2RkNTdhZDk2ZWExMzZhY2RlYTI0M2M2MjlkMTEyYTQzYWE0ODY1JlgtQW16LVNpZ25lZEhlYWRlcnM9aG9zdCJ9.VanGvZZNDqamBAjEBaCjsMJusej89OjHxwY8O2R72i4
 5 | 
 6 | Probe - I want you to tell what you see on each image.]]></body></details>
 7 |   
 8 |   <comments><comment type="issue"><author>buger</author><timestamp>2025-07-21T08:41:45Z</timestamp><content><![CDATA[/probe - I want you to tell what you see on each image ?]]></content></comment><comment type="issue"><author>buger</author><timestamp>2025-07-22T15:42:14Z</timestamp><content><![CDATA[/probe - I want you to tell what you see on each image ?]]></content></comment></comments>
 9 |   <user_request author="buger" timestamp="2025-07-22T15:42:14Z"><![CDATA[- I want you to tell what you see on each image ?]]></user_request>
10 |   </github_context>
11 |   
12 |   <instructions><![CDATA[You are an AI assistant analyzing a GitHub Issue...]]></instructions>


--------------------------------------------------------------------------------
/tests/schemas/xml_output_schema.xsd:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
 3 |   <xs:element name="probe_results">
 4 |     <xs:complexType>
 5 |       <xs:sequence>
 6 |         <xs:element name="result" minOccurs="0" maxOccurs="unbounded">
 7 |           <xs:complexType>
 8 |             <xs:sequence>
 9 |               <xs:element name="file" type="xs:string"/>
10 |               <xs:element name="lines" type="xs:string"/>
11 |               <xs:element name="node_type" type="xs:string"/>
12 |               <xs:element name="column_start" type="xs:integer" minOccurs="0"/>
13 |               <xs:element name="column_end" type="xs:integer" minOccurs="0"/>
14 |               <xs:element name="code" type="xs:string"/>
15 |               <xs:element name="matched_keywords" minOccurs="0">
16 |                 <xs:complexType>
17 |                   <xs:sequence>
18 |                     <xs:element name="keyword" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
19 |                   </xs:sequence>
20 |                 </xs:complexType>
21 |               </xs:element>
22 |               <xs:element name="score" type="xs:decimal" minOccurs="0"/>
23 |             </xs:sequence>
24 |           </xs:complexType>
25 |         </xs:element>
26 |         <xs:element name="summary">
27 |           <xs:complexType>
28 |             <xs:sequence>
29 |               <xs:element name="count" type="xs:nonNegativeInteger"/>
30 |               <xs:element name="total_bytes" type="xs:nonNegativeInteger"/>
31 |               <xs:element name="total_tokens" type="xs:nonNegativeInteger"/>
32 |             </xs:sequence>
33 |           </xs:complexType>
34 |         </xs:element>
35 |       </xs:sequence>
36 |     </xs:complexType>
37 |   </xs:element>
38 | </xs:schema>


--------------------------------------------------------------------------------
/src/search/term_exceptions.rs:
--------------------------------------------------------------------------------
 1 | use once_cell::sync::Lazy;
 2 | use std::collections::HashSet;
 3 | 
 4 | /// Static set of special case terms that should be treated as exceptions
 5 | /// These terms are used in compound word detection and special handling
 6 | pub static EXCEPTION_TERMS: Lazy<HashSet<String>> = Lazy::new(|| {
 7 |     vec![
 8 |         // Network and security related terms
 9 |         "network",
10 |         "firewall",
11 |         // Common technology terms
12 |         "rpc",
13 |         "api",
14 |         "http",
15 |         "json",
16 |         "xml",
17 |         "html",
18 |         "css",
19 |         "js",
20 |         "db",
21 |         "sql",
22 |         // Common software architecture terms
23 |         "handler",
24 |         "controller",
25 |         "service",
26 |         "repository",
27 |         "manager",
28 |         "factory",
29 |         "provider",
30 |         "client",
31 |         "server",
32 |         "config",
33 |         "util",
34 |         "helper",
35 |         "storage",
36 |         "cache",
37 |         "queue",
38 |         "worker",
39 |         "job",
40 |         "task",
41 |         "event",
42 |         "listener",
43 |         "callback",
44 |         "middleware",
45 |         "filter",
46 |         "validator",
47 |         "converter",
48 |         "transformer",
49 |         "parser",
50 |         "serializer",
51 |         "deserializer",
52 |         "encoder",
53 |         "decoder",
54 |         "reader",
55 |         "writer",
56 |         // Common programming workflow terms
57 |         "workflow",
58 |     ]
59 |     .into_iter()
60 |     .map(String::from)
61 |     .collect()
62 | });
63 | 
64 | /// Checks if a term is in the exception list
65 | pub fn is_exception_term(term: &str) -> bool {
66 |     EXCEPTION_TERMS.contains(&term.to_lowercase())
67 | }
68 | 


--------------------------------------------------------------------------------
/npm/src/tools/index.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Main tools module
 3 |  * @module tools
 4 |  */
 5 | 
 6 | // Export Vercel AI SDK tool generators
 7 | export { searchTool, queryTool, extractTool, delegateTool } from './vercel.js';
 8 | export { bashTool } from './bash.js';
 9 | export { editTool, createTool } from './edit.js';
10 | 
11 | // Export LangChain tools
12 | export { createSearchTool, createQueryTool, createExtractTool } from './langchain.js';
13 | 
14 | // Export common schemas
15 | export {
16 | 	searchSchema,
17 | 	querySchema,
18 | 	extractSchema,
19 | 	delegateSchema,
20 | 	bashSchema,
21 | 	delegateDescription,
22 | 	delegateToolDefinition,
23 | 	bashDescription,
24 | 	bashToolDefinition,
25 | 	attemptCompletionSchema,
26 | 	attemptCompletionToolDefinition
27 | } from './common.js';
28 | 
29 | // Export edit and create schemas
30 | export {
31 | 	editSchema,
32 | 	createSchema,
33 | 	editDescription,
34 | 	createDescription,
35 | 	editToolDefinition,
36 | 	createToolDefinition
37 | } from './edit.js';
38 | 
39 | // Export system message
40 | export { DEFAULT_SYSTEM_MESSAGE } from './system-message.js';
41 | 
42 | // For backward compatibility, create and export pre-configured tools
43 | import { searchTool as searchToolGenerator, queryTool as queryToolGenerator, extractTool as extractToolGenerator, delegateTool as delegateToolGenerator } from './vercel.js';
44 | import { bashTool as bashToolGenerator } from './bash.js';
45 | import { DEFAULT_SYSTEM_MESSAGE } from './system-message.js';
46 | 
47 | // Create default tool instances (for backward compatibility)
48 | const tools = {
49 | 	searchTool: searchToolGenerator(),
50 | 	queryTool: queryToolGenerator(),
51 | 	extractTool: extractToolGenerator(),
52 | 	delegateTool: delegateToolGenerator(),
53 | 	bashTool: bashToolGenerator(),
54 | 	DEFAULT_SYSTEM_MESSAGE
55 | };
56 | 
57 | export { tools };


--------------------------------------------------------------------------------
/examples/reranker/rust_bert_test/README.md:
--------------------------------------------------------------------------------
 1 | # Rust-BERT Cross-Encoder Test
 2 | 
 3 | This example tests cross-encoder functionality using rust-bert to compare with our Candle implementation.
 4 | 
 5 | ## Setup
 6 | 
 7 | 1. Install libtorch (required by rust-bert):
 8 |    - macOS: `brew install pytorch`
 9 |    - Linux: Download from https://pytorch.org/get-started/locally/
10 | 
11 | 2. Set environment variables:
12 |    ```bash
13 |    export LIBTORCH=/usr/local/opt/pytorch  # macOS with Homebrew
14 |    # or
15 |    export LIBTORCH=/path/to/libtorch       # Linux/custom installation
16 |    ```
17 | 
18 | 3. Build and run:
19 |    ```bash
20 |    cargo run --release
21 |    ```
22 | 
23 | ## Model Conversion
24 | 
25 | To use the TinyBERT model with rust-bert, you need to convert it to the .ot format:
26 | 
27 | ```python
28 | # convert_model.py
29 | import torch
30 | from transformers import AutoModelForSequenceClassification
31 | 
32 | model = AutoModelForSequenceClassification.from_pretrained('cross-encoder/ms-marco-TinyBERT-L-2-v2')
33 | traced = torch.jit.trace(model, (torch.zeros(1, 512, dtype=torch.long),))
34 | traced.save("rust_model.ot")
35 | ```
36 | 
37 | ## Notes
38 | 
39 | - rust-bert expects models in TorchScript format (.ot files)
40 | - The sequence classification pipeline is designed for classification, not regression
41 | - For true cross-encoder scoring, you may need to modify the pipeline
42 | - This example demonstrates the approach but may not give identical results to Python
43 | 
44 | ## Comparison with Candle
45 | 
46 | Our Candle implementation:
47 | - Loads PyTorch .bin files directly
48 | - Implements cross-encoder architecture manually
49 | - Returns raw logits for scoring
50 | 
51 | rust-bert approach:
52 | - Uses TorchScript format
53 | - Provides high-level pipelines
54 | - Returns classification labels with confidence scores


--------------------------------------------------------------------------------
/npm/test-grep-simplified.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | 
 3 | import { grep } from './src/index.js';
 4 | 
 5 | async function testSimplifiedGrep() {
 6 | 	console.log('Testing simplified grep API...\n');
 7 | 
 8 | 	try {
 9 | 		// Test 1: Basic search (line numbers enabled by default)
10 | 		console.log('Test 1: Basic search for "export" in src directory');
11 | 		const result1 = await grep({
12 | 			pattern: 'export',
13 | 			paths: './src',
14 | 			lineNumbers: true  // This should be the default
15 | 		});
16 | 		console.log('First 5 lines:');
17 | 		console.log(result1.split('\n').slice(0, 5).join('\n'));
18 | 		console.log('\n---\n');
19 | 
20 | 		// Test 2: Case-insensitive search
21 | 		console.log('Test 2: Case-insensitive search for "TODO"');
22 | 		const result2 = await grep({
23 | 			pattern: 'todo',
24 | 			paths: './src',
25 | 			ignoreCase: true,
26 | 			lineNumbers: true
27 | 		});
28 | 		console.log('Result:');
29 | 		console.log(result2);
30 | 		console.log('\n---\n');
31 | 
32 | 		// Test 3: Count matches
33 | 		console.log('Test 3: Count "function" occurrences');
34 | 		const result3 = await grep({
35 | 			pattern: 'function',
36 | 			paths: './src',
37 | 			count: true
38 | 		});
39 | 		console.log('First 5 files:');
40 | 		console.log(result3.split('\n').slice(0, 5).join('\n'));
41 | 		console.log('\n---\n');
42 | 
43 | 		// Test 4: Search with context
44 | 		console.log('Test 4: Search with 1 line of context');
45 | 		const result4 = await grep({
46 | 			pattern: 'export.*grep',
47 | 			paths: './src/index.js',
48 | 			context: 1,
49 | 			lineNumbers: true
50 | 		});
51 | 		console.log('Result:');
52 | 		console.log(result4);
53 | 		console.log('\n---\n');
54 | 
55 | 		console.log('✅ All simplified grep tests passed!');
56 | 	} catch (error) {
57 | 		console.error('❌ Test failed:', error.message);
58 | 		console.error(error);
59 | 		process.exit(1);
60 | 	}
61 | }
62 | 
63 | testSimplifiedGrep();
64 | 


--------------------------------------------------------------------------------
/.github/workflows/vitepress-gh-pages.yml.disabled:
--------------------------------------------------------------------------------
 1 | # DEPRECATED: This workflow has been disabled in favor of Cloudflare Pages deployment
 2 | # The site is now deployed automatically via Cloudflare Pages integration
 3 | # See site/wrangler.toml for the new deployment configuration
 4 | # 
 5 | # .github/workflows/deploy.yml
 6 | name: Build and Deploy VitePress Site (DISABLED)
 7 | 
 8 | on:
 9 |   push:
10 |     branches: ["main"] # Trigger on push to main branch
11 |   workflow_dispatch: # Allow manual trigger from GitHub Actions tab
12 | 
13 | # Sets permissions for the GITHUB_TOKEN to allow deployment to GitHub Pages
14 | permissions:
15 |   contents: read
16 |   pages: write
17 |   id-token: write
18 | 
19 | # Allow only one concurrent deployment
20 | concurrency:
21 |   group: "pages"
22 |   cancel-in-progress: false
23 | 
24 | jobs:
25 |   build-and-deploy:
26 |     runs-on: ubuntu-latest
27 |     steps:
28 |       - name: Checkout
29 |         uses: actions/checkout@v4
30 |         with:
31 |           fetch-depth: 0 # Fetch all history for lastUpdated (optional)
32 | 
33 |       - name: Setup Node
34 |         uses: actions/setup-node@v4
35 |         with:
36 |           node-version: 20
37 |           # Temporarily disable cache to troubleshoot the issue
38 | 
39 |       - name: Install Dependencies
40 |         run: cd site && npm install # Use npm install instead of npm ci to ensure package-lock.json is generated
41 | 
42 |       - name: Build VitePress Site
43 |         run: cd site && npm run docs:build # Build the VitePress site in the site directory
44 | 
45 |       - name: Setup Pages
46 |         uses: actions/configure-pages@v4
47 | 
48 |       - name: Upload Artifact
49 |         uses: actions/upload-pages-artifact@v3
50 |         with:
51 |           path: ./site/.vitepress/dist # Upload the VitePress build output as the artifact
52 | 
53 |       - name: Deploy to GitHub Pages
54 |         id: deployment
55 |         uses: actions/deploy-pages@v4
56 | 


--------------------------------------------------------------------------------
/tests/lib_usage.rs:
--------------------------------------------------------------------------------
 1 | #[cfg(test)]
 2 | mod tests {
 3 |     use probe_code::search::{perform_probe, SearchOptions};
 4 |     use std::path::Path;
 5 | 
 6 |     #[test]
 7 |     fn test_search_functionality() {
 8 |         // Create search options
 9 |         let options = SearchOptions {
10 |             path: Path::new("."),
11 |             queries: &["function".to_string()],
12 |             files_only: false,
13 |             custom_ignores: &[],
14 |             exclude_filenames: false,
15 |             reranker: "bm25",
16 |             frequency_search: true,
17 |             exact: false,
18 |             language: None,
19 |             max_results: Some(5),
20 |             max_bytes: None,
21 |             max_tokens: None,
22 |             allow_tests: true,
23 |             no_merge: false,
24 |             merge_threshold: None,
25 |             dry_run: false,
26 |             session: None,
27 |             timeout: 30,
28 |             question: None,
29 |             no_gitignore: false,
30 |         };
31 | 
32 |         let results = perform_probe(&options).unwrap();
33 | 
34 |         // Just check that we get some results
35 |         assert!(!results.results.is_empty());
36 |         println!("Found {} results", results.results.len());
37 |     }
38 | 
39 |     #[test]
40 |     fn test_query_functionality() {
41 |         use probe_code::query::{perform_query, QueryOptions};
42 | 
43 |         let options = QueryOptions {
44 |             path: Path::new("."),
45 |             pattern: "fn",
46 |             language: Some("rust"),
47 |             ignore: &[],
48 |             allow_tests: true,
49 |             max_results: Some(5),
50 |             format: "text",
51 |             no_gitignore: false,
52 |         };
53 | 
54 |         let matches = perform_query(&options).unwrap();
55 | 
56 |         // Just check that we get some results
57 |         assert!(!matches.is_empty());
58 |         println!("Found {} matches", matches.len());
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/npm/src/utils/symlink-utils.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Symlink resolution utilities for the probe package
 3 |  * @module utils/symlink-utils
 4 |  */
 5 | 
 6 | import fs from 'fs';
 7 | import { promises as fsPromises } from 'fs';
 8 | 
 9 | /**
10 |  * Get entry type following symlinks (async version)
11 |  *
12 |  * Uses fs.stat() which follows symlinks to get the actual target type.
13 |  * Falls back to dirent type if stat fails (e.g., broken symlink).
14 |  *
15 |  * @param {fs.Dirent} entry - Directory entry from readdir
16 |  * @param {string} fullPath - Full path to the entry
17 |  * @returns {Promise<{isFile: boolean, isDirectory: boolean, size: number}>}
18 |  */
19 | export async function getEntryType(entry, fullPath) {
20 | 	try {
21 | 		const stats = await fsPromises.stat(fullPath);
22 | 		return {
23 | 			isFile: stats.isFile(),
24 | 			isDirectory: stats.isDirectory(),
25 | 			size: stats.size
26 | 		};
27 | 	} catch {
28 | 		// Fall back to dirent type if stat fails (e.g., broken symlink)
29 | 		return {
30 | 			isFile: entry.isFile(),
31 | 			isDirectory: entry.isDirectory(),
32 | 			size: 0
33 | 		};
34 | 	}
35 | }
36 | 
37 | /**
38 |  * Get entry type following symlinks (sync version)
39 |  *
40 |  * Uses fs.statSync() which follows symlinks to get the actual target type.
41 |  * Falls back to dirent type if stat fails (e.g., broken symlink).
42 |  *
43 |  * @param {fs.Dirent} entry - Directory entry from readdir
44 |  * @param {string} fullPath - Full path to the entry
45 |  * @returns {{isFile: boolean, isDirectory: boolean, size: number}}
46 |  */
47 | export function getEntryTypeSync(entry, fullPath) {
48 | 	try {
49 | 		const stats = fs.statSync(fullPath);
50 | 		return {
51 | 			isFile: stats.isFile(),
52 | 			isDirectory: stats.isDirectory(),
53 | 			size: stats.size
54 | 		};
55 | 	} catch {
56 | 		// Fall back to dirent type if stat fails (e.g., broken symlink)
57 | 		return {
58 | 			isFile: entry.isFile(),
59 | 			isDirectory: entry.isDirectory(),
60 | 			size: 0
61 | 		};
62 | 	}
63 | }
64 | 


--------------------------------------------------------------------------------
/npm/src/agent/engines/vercel.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Vercel AI SDK Engine - wraps existing ProbeAgent logic
 3 |  * This maintains full backward compatibility
 4 |  */
 5 | 
 6 | import { streamText } from 'ai';
 7 | 
 8 | /**
 9 |  * Create a Vercel AI SDK engine
10 |  * @param {Object} agent - The ProbeAgent instance
11 |  * @returns {Object} Engine interface
12 |  */
13 | export function createVercelEngine(agent) {
14 |   return {
15 |     /**
16 |      * Query the model using existing Vercel AI SDK implementation
17 |      * @param {string} prompt - The prompt to send
18 |      * @param {Object} options - Additional options
19 |      * @returns {AsyncIterable} Response stream
20 |      */
21 |     async *query(prompt, options = {}) {
22 |       // Build messages array
23 |       const messages = [
24 |         ...agent.history,
25 |         { role: 'user', content: prompt }
26 |       ];
27 | 
28 |       // Use existing streamText with retry and fallback
29 |       const result = await agent.streamTextWithRetryAndFallback({
30 |         model: agent.provider(agent.model),
31 |         messages,
32 |         maxTokens: options.maxTokens || agent.maxResponseTokens,
33 |         temperature: options.temperature,
34 |         tools: options.tools,
35 |         toolChoice: options.toolChoice,
36 |         experimental_telemetry: options.telemetry
37 |       });
38 | 
39 |       // Stream the response
40 |       for await (const chunk of result.textStream) {
41 |         yield { type: 'text', content: chunk };
42 |       }
43 | 
44 |       // Handle tool calls if any
45 |       if (result.toolCalls && result.toolCalls.length > 0) {
46 |         yield { type: 'tool_calls', toolCalls: result.toolCalls };
47 |       }
48 | 
49 |       // Handle finish reason
50 |       if (result.finishReason) {
51 |         yield { type: 'finish', reason: result.finishReason };
52 |       }
53 |     },
54 | 
55 |     /**
56 |      * Optional cleanup
57 |      */
58 |     async close() {
59 |       // Nothing to cleanup for Vercel AI
60 |     }
61 |   };
62 | }


--------------------------------------------------------------------------------
/src/search/test_patterns.rs:
--------------------------------------------------------------------------------
 1 | use probe_code::search::query::{preprocess_query, create_term_patterns};
 2 | use std::collections::HashSet;
 3 | 
 4 | #[test]
 5 | fn test_grouped_patterns() {
 6 |     // Test with "ip" and "whitelisting"
 7 |     let term_pairs = vec![
 8 |         ("ip".to_string(), "ip".to_string()),
 9 |         ("whitelisting".to_string(), "whitelist".to_string()),
10 |     ];
11 | 
12 |     let patterns = create_term_patterns(&term_pairs);
13 | 
14 |     // Print the patterns for inspection
15 |     println!("Generated patterns:");
16 |     for (pattern, indices) in &patterns {
17 |         println!("Pattern: {pattern:?}, Indices: {indices:?}");
18 |     }
19 | 
20 |     // Verify we have the expected number of patterns
21 |     // 1 pattern for each term (with combined boundaries) + 1 pattern for combinations
22 |     assert_eq!(patterns.len(), 3);
23 | 
24 |     // Verify the first pattern is for "ip" with both boundaries
25 |     let ip_pattern = patterns.iter().find(|(_, indices)| indices.len() == 1 && indices.contains(&0));
26 |     assert!(ip_pattern.is_some());
27 |     let (ip_pattern, _) = ip_pattern.unwrap();
28 |     assert!(ip_pattern.contains("\\bip|ip\\b"));
29 | 
30 |     // Verify the second pattern is for "whitelisting|whitelist" with both boundaries
31 |     let whitelist_pattern = patterns.iter().find(|(_, indices)| indices.len() == 1 && indices.contains(&1));
32 |     assert!(whitelist_pattern.is_some());
33 |     let (whitelist_pattern, _) = whitelist_pattern.unwrap();
34 |     assert!(whitelist_pattern.contains("(whitelisting|whitelist)"));
35 | 
36 |     // Verify the third pattern contains all combinations
37 |     let combo_pattern = patterns.iter().find(|(_, indices)| indices.len() == 2);
38 |     assert!(combo_pattern.is_some());
39 |     let (combo_pattern, _) = combo_pattern.unwrap();
40 |     assert!(combo_pattern.contains("("));
41 |     assert!(combo_pattern.contains("|"));
42 |     assert!(combo_pattern.contains("ipwhitelisting"));
43 |     assert!(combo_pattern.contains("ipwhitelist"));
44 | }
45 | 


--------------------------------------------------------------------------------
/src/language/block_handling.rs:
--------------------------------------------------------------------------------
 1 | /// Function to merge overlapping code blocks
 2 | #[cfg(test)]
 3 | pub fn merge_code_blocks(
 4 |     code_blocks: Vec<crate::models::CodeBlock>,
 5 | ) -> Vec<crate::models::CodeBlock> {
 6 |     let mut merged_blocks: Vec<crate::models::CodeBlock> = Vec::new();
 7 |     let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
 8 | 
 9 |     for block in code_blocks {
10 |         if let Some(last) = merged_blocks.last_mut() {
11 |             // Use a consistent threshold of 10 lines for all block types
12 |             let threshold = 10;
13 | 
14 |             if block.start_row <= last.end_row + threshold {
15 |                 if debug_mode {
16 |                     println!(
17 |                         "DEBUG: Merging blocks: {} ({}-{}) with {} ({}-{})",
18 |                         last.node_type,
19 |                         last.start_row + 1,
20 |                         last.end_row + 1,
21 |                         block.node_type,
22 |                         block.start_row + 1,
23 |                         block.end_row + 1
24 |                     );
25 |                 }
26 |                 last.end_row = last.end_row.max(block.end_row);
27 |                 last.end_byte = last.end_byte.max(block.end_byte);
28 |                 last.start_row = last.start_row.min(block.start_row);
29 |                 last.start_byte = last.start_byte.min(block.start_byte);
30 |                 continue;
31 |             }
32 |         }
33 |         merged_blocks.push(block);
34 |     }
35 | 
36 |     if debug_mode {
37 |         println!(
38 |             "DEBUG: After merging: {len} blocks",
39 |             len = merged_blocks.len()
40 |         );
41 |         for (i, block) in merged_blocks.iter().enumerate() {
42 |             println!(
43 |                 "DEBUG:   Block {}: type={}, lines={}-{}",
44 |                 i + 1,
45 |                 block.node_type,
46 |                 block.start_row + 1,
47 |                 block.end_row + 1
48 |             );
49 |         }
50 |     }
51 |     merged_blocks
52 | }
53 | 


--------------------------------------------------------------------------------
/src/language/factory.rs:
--------------------------------------------------------------------------------
 1 | use probe_code::language::c::CLanguage;
 2 | use probe_code::language::cpp::CppLanguage;
 3 | use probe_code::language::csharp::CSharpLanguage;
 4 | use probe_code::language::go::GoLanguage;
 5 | use probe_code::language::html::HtmlLanguage;
 6 | use probe_code::language::java::JavaLanguage;
 7 | use probe_code::language::javascript::JavaScriptLanguage;
 8 | use probe_code::language::language_trait::LanguageImpl;
 9 | use probe_code::language::markdown::MarkdownLanguage;
10 | use probe_code::language::php::PhpLanguage;
11 | use probe_code::language::python::PythonLanguage;
12 | use probe_code::language::ruby::RubyLanguage;
13 | use probe_code::language::rust::RustLanguage;
14 | use probe_code::language::swift::SwiftLanguage;
15 | use probe_code::language::typescript::TypeScriptLanguage;
16 | use probe_code::language::yaml::YamlLanguage;
17 | 
18 | /// Factory function to get the appropriate language implementation based on file extension
19 | pub fn get_language_impl(extension: &str) -> Option<Box<dyn LanguageImpl>> {
20 |     match extension {
21 |         "rs" => Some(Box::new(RustLanguage::new())),
22 |         "js" | "jsx" => Some(Box::new(JavaScriptLanguage::new())),
23 |         "ts" => Some(Box::new(TypeScriptLanguage::new_typescript())),
24 |         "tsx" => Some(Box::new(TypeScriptLanguage::new_tsx())),
25 |         "py" => Some(Box::new(PythonLanguage::new())),
26 |         "go" => Some(Box::new(GoLanguage::new())),
27 |         "c" | "h" => Some(Box::new(CLanguage::new())),
28 |         "cpp" | "cc" | "cxx" | "hpp" | "hxx" => Some(Box::new(CppLanguage::new())),
29 |         "java" => Some(Box::new(JavaLanguage::new())),
30 |         "rb" => Some(Box::new(RubyLanguage::new())),
31 |         "php" => Some(Box::new(PhpLanguage::new())),
32 |         "swift" => Some(Box::new(SwiftLanguage::new())),
33 |         "cs" => Some(Box::new(CSharpLanguage::new())),
34 |         "html" | "htm" => Some(Box::new(HtmlLanguage::new())),
35 |         "md" | "markdown" => Some(Box::new(MarkdownLanguage::new())),
36 |         "yaml" | "yml" => Some(Box::new(YamlLanguage::new())),
37 |         _ => None,
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/language/c.rs:
--------------------------------------------------------------------------------
 1 | use super::language_trait::LanguageImpl;
 2 | use tree_sitter::{Language as TSLanguage, Node};
 3 | 
 4 | /// Implementation of LanguageImpl for C
 5 | pub struct CLanguage;
 6 | 
 7 | impl Default for CLanguage {
 8 |     fn default() -> Self {
 9 |         Self::new()
10 |     }
11 | }
12 | 
13 | impl CLanguage {
14 |     pub fn new() -> Self {
15 |         CLanguage
16 |     }
17 | }
18 | 
19 | impl LanguageImpl for CLanguage {
20 |     fn get_tree_sitter_language(&self) -> TSLanguage {
21 |         tree_sitter_c::LANGUAGE.into()
22 |     }
23 | 
24 |     fn get_extension(&self) -> &'static str {
25 |         "c"
26 |     }
27 | 
28 |     fn is_acceptable_parent(&self, node: &Node) -> bool {
29 |         matches!(
30 |             node.kind(),
31 |             "function_definition" | "declaration" | "struct_specifier" | "enum_specifier"
32 |         )
33 |     }
34 | 
35 |     fn is_test_node(&self, node: &Node, source: &[u8]) -> bool {
36 |         let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
37 |         let node_type = node.kind();
38 | 
39 |         // C: Check function_definition nodes with test in the name
40 |         if node_type == "function_definition" {
41 |             let mut cursor = node.walk();
42 |             for child in node.children(&mut cursor) {
43 |                 if child.kind() == "function_declarator" {
44 |                     let mut subcursor = child.walk();
45 |                     for subchild in child.children(&mut subcursor) {
46 |                         if subchild.kind() == "identifier" {
47 |                             let name = subchild.utf8_text(source).unwrap_or("");
48 |                             if name.contains("test") || name.contains("Test") {
49 |                                 if debug_mode {
50 |                                     println!("DEBUG: Test node detected (C): test function");
51 |                                 }
52 |                                 return true;
53 |                             }
54 |                         }
55 |                     }
56 |                 }
57 |             }
58 |         }
59 | 
60 |         false
61 |     }
62 | }
63 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.8'
 2 | 
 3 | services:
 4 |   probe:
 5 |     image: buger/probe:latest
 6 |     build:
 7 |       context: .
 8 |       dockerfile: Dockerfile
 9 |       args:
10 |         VERSION: ${VERSION:-dev}
11 |         BUILD_DATE: ${BUILD_DATE:-}
12 |         VCS_REF: ${VCS_REF:-}
13 |     volumes:
14 |       - ./:/workspace:ro
15 |     working_dir: /workspace
16 |     command: --help
17 | 
18 |   probe-chat-cli:
19 |     image: buger/probe-chat:latest
20 |     build:
21 |       context: ./examples/chat
22 |       dockerfile: Dockerfile
23 |       args:
24 |         VERSION: ${VERSION:-dev}
25 |         BUILD_DATE: ${BUILD_DATE:-}
26 |         VCS_REF: ${VCS_REF:-}
27 |     environment:
28 |       - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
29 |       - OPENAI_API_KEY=${OPENAI_API_KEY}
30 |       - ALLOWED_FOLDERS=${ALLOWED_FOLDERS:-}
31 |     volumes:
32 |       - ./:/workspace:ro
33 |     working_dir: /workspace
34 |     stdin_open: true
35 |     tty: true
36 | 
37 |   probe-chat-web:
38 |     image: buger/probe-chat:latest
39 |     build:
40 |       context: ./examples/chat
41 |       dockerfile: Dockerfile
42 |       args:
43 |         VERSION: ${VERSION:-dev}
44 |         BUILD_DATE: ${BUILD_DATE:-}
45 |         VCS_REF: ${VCS_REF:-}
46 |     environment:
47 |       - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
48 |       - OPENAI_API_KEY=${OPENAI_API_KEY}
49 |       - ALLOWED_FOLDERS=${ALLOWED_FOLDERS:-}
50 |     volumes:
51 |       - ./:/workspace:ro
52 |     working_dir: /workspace
53 |     ports:
54 |       - "3000:3000"
55 |     command: --web
56 |     healthcheck:
57 |       test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
58 |       interval: 30s
59 |       timeout: 3s
60 |       retries: 3
61 |       start_period: 5s
62 | 
63 | # Development profile for local builds
64 | profiles:
65 |   dev:
66 |     probe-dev:
67 |       build:
68 |         context: .
69 |         dockerfile: Dockerfile
70 |         cache_from:
71 |           - buger/probe:latest
72 |       volumes:
73 |         - ./:/workspace:ro
74 |         - cargo-cache:/usr/local/cargo
75 |       working_dir: /workspace
76 |       command: --help
77 | 
78 | volumes:
79 |   cargo-cache:


--------------------------------------------------------------------------------
/examples/chat/implement/backends/registry.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Backend registry for automatic discovery and registration
 3 |  * @module registry
 4 |  */
 5 | 
 6 | import AiderBackend from './AiderBackend.js';
 7 | import ClaudeCodeBackend from './ClaudeCodeBackend.js';
 8 | 
 9 | /**
10 |  * Available backend classes
11 |  */
12 | const AVAILABLE_BACKENDS = {
13 |   aider: AiderBackend,
14 |   'claude-code': ClaudeCodeBackend
15 | };
16 | 
17 | /**
18 |  * Get all available backend classes
19 |  * @returns {Object<string, typeof BaseBackend>}
20 |  */
21 | function getAvailableBackends() {
22 |   return { ...AVAILABLE_BACKENDS };
23 | }
24 | 
25 | /**
26 |  * Create a backend instance by name
27 |  * @param {string} name - Backend name
28 |  * @returns {BaseBackend|null}
29 |  */
30 | function createBackend(name) {
31 |   const BackendClass = AVAILABLE_BACKENDS[name];
32 |   if (!BackendClass) {
33 |     return null;
34 |   }
35 |   
36 |   return new BackendClass();
37 | }
38 | 
39 | /**
40 |  * Register a custom backend class
41 |  * @param {string} name - Backend name
42 |  * @param {typeof BaseBackend} BackendClass - Backend class
43 |  */
44 | function registerBackend(name, BackendClass) {
45 |   AVAILABLE_BACKENDS[name] = BackendClass;
46 | }
47 | 
48 | /**
49 |  * Get backend metadata
50 |  * @param {string} name - Backend name
51 |  * @returns {Object|null}
52 |  */
53 | function getBackendMetadata(name) {
54 |   const backend = createBackend(name);
55 |   if (!backend) {
56 |     return null;
57 |   }
58 |   
59 |   return {
60 |     name: backend.name,
61 |     version: backend.version,
62 |     description: backend.getDescription(),
63 |     capabilities: backend.getCapabilities(),
64 |     dependencies: backend.getRequiredDependencies()
65 |   };
66 | }
67 | 
68 | /**
69 |  * List all registered backend names
70 |  * @returns {string[]}
71 |  */
72 | function listBackendNames() {
73 |   return Object.keys(AVAILABLE_BACKENDS);
74 | }
75 | 
76 | export {
77 |   getAvailableBackends,
78 |   createBackend,
79 |   registerBackend,
80 |   getBackendMetadata,
81 |   listBackendNames,
82 |   // Export backend classes for direct use
83 |   AiderBackend,
84 |   ClaudeCodeBackend
85 | };


--------------------------------------------------------------------------------
/npm/src/utils/path-validation.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Path validation utilities for the probe package
 3 |  * @module utils/path-validation
 4 |  */
 5 | 
 6 | import path from 'path';
 7 | import { promises as fs } from 'fs';
 8 | 
 9 | /**
10 |  * Validates and normalizes a path to be used as working directory (cwd).
11 |  *
12 |  * Security considerations:
13 |  * - Normalizes path to resolve '..' and '.' components
14 |  * - Returns absolute path to prevent ambiguity
15 |  * - Does NOT restrict access to specific directories (that's the responsibility
16 |  *   of higher-level components like ProbeAgent with allowedFolders)
17 |  *
18 |  * @param {string} inputPath - The path to validate
19 |  * @param {string} [defaultPath] - Default path to use if inputPath is not provided
20 |  * @returns {Promise<string>} Normalized absolute path
21 |  * @throws {Error} If the path is invalid or doesn't exist
22 |  */
23 | export async function validateCwdPath(inputPath, defaultPath = process.cwd()) {
24 | 	// Use default if not provided
25 | 	const targetPath = inputPath || defaultPath;
26 | 
27 | 	// Normalize and resolve to absolute path
28 | 	// This handles '..' traversal and makes the path unambiguous
29 | 	const normalizedPath = path.normalize(path.resolve(targetPath));
30 | 
31 | 	// Verify the path exists and is a directory
32 | 	try {
33 | 		const stats = await fs.stat(normalizedPath);
34 | 		if (!stats.isDirectory()) {
35 | 			throw new Error(`Path is not a directory: ${normalizedPath}`);
36 | 		}
37 | 	} catch (error) {
38 | 		if (error.code === 'ENOENT') {
39 | 			throw new Error(`Path does not exist: ${normalizedPath}`);
40 | 		}
41 | 		throw error;
42 | 	}
43 | 
44 | 	return normalizedPath;
45 | }
46 | 
47 | /**
48 |  * Validates a path option without requiring it to exist.
49 |  * Use this for paths that might be created or are optional.
50 |  *
51 |  * @param {string} inputPath - The path to validate
52 |  * @param {string} [defaultPath] - Default path to use if inputPath is not provided
53 |  * @returns {string} Normalized absolute path
54 |  */
55 | export function normalizePath(inputPath, defaultPath = process.cwd()) {
56 | 	const targetPath = inputPath || defaultPath;
57 | 	return path.normalize(path.resolve(targetPath));
58 | }
59 | 


--------------------------------------------------------------------------------
/examples/reranker/MODELS.md:
--------------------------------------------------------------------------------
 1 | # MS-MARCO Cross-Encoder Models
 2 | 
 3 | ## Available Models
 4 | 
 5 | ### 1. TinyBERT-L-2-v2 (`ms-marco-tinybert`)
 6 | - **Parameters**: 4.4M
 7 | - **Layers**: 2
 8 | - **Hidden Size**: 128
 9 | - **Performance**: Fast but limited discrimination
10 | - **Use Case**: Quick reranking when speed is critical
11 | 
12 | ### 2. MiniLM-L-6-v2 (`ms-marco-minilm-l6`)
13 | - **Parameters**: 22.7M
14 | - **Layers**: 6  
15 | - **Hidden Size**: 384
16 | - **Performance**: Good balance of speed and accuracy
17 | - **Use Case**: Recommended for most applications
18 | 
19 | ### 3. MiniLM-L-12-v2 (`ms-marco-minilm-l12`)
20 | - **Parameters**: 33.4M
21 | - **Layers**: 12
22 | - **Hidden Size**: 384
23 | - **Performance**: Best accuracy, slower
24 | - **Use Case**: When accuracy is more important than speed
25 | 
26 | ## Performance Comparison
27 | 
28 | Based on MS MARCO evaluation:
29 | 
30 | | Model | MRR@10 | Params | Speed (V100) |
31 | |-------|--------|--------|--------------|
32 | | TinyBERT-L-2 | 0.312 | 4.4M | ~9000 docs/sec |
33 | | MiniLM-L-6 | 0.384 | 22.7M | ~2800 docs/sec |
34 | | MiniLM-L-12 | 0.391 | 33.4M | ~960 docs/sec |
35 | 
36 | ## Usage
37 | 
38 | ```bash
39 | # Download models
40 | ./download_models.sh
41 | 
42 | # Use in probe
43 | probe search "query" . --reranker ms-marco-minilm-l6 --question "natural language question"
44 | ```
45 | 
46 | ## Model Architecture
47 | 
48 | All models use the same cross-encoder architecture:
49 | 1. Input: `[CLS] query [SEP] document [SEP]`
50 | 2. BERT encoder processes the concatenated input
51 | 3. [CLS] token representation is passed through a linear classifier
52 | 4. Output: Single relevance score (raw logit)
53 | 
54 | ## Recommendations
55 | 
56 | - **Start with MiniLM-L-6**: It provides much better discrimination than TinyBERT while still being reasonably fast
57 | - **Use TinyBERT only if**: You need maximum speed and can tolerate lower accuracy
58 | - **Use MiniLM-L-12 when**: You need the best possible ranking quality
59 | 
60 | ## Token Limits
61 | 
62 | All models support up to 512 tokens, which is split between:
63 | - Query: typically 10-50 tokens
64 | - Document: remaining tokens (460-500)
65 | 
66 | Documents are truncated if they exceed the limit.


--------------------------------------------------------------------------------
/examples/reranker/MODEL_COMPARISON.md:
--------------------------------------------------------------------------------
 1 | # Model Comparison Results
 2 | 
 3 | ## Summary
 4 | 
 5 | We successfully added support for two additional MS-MARCO cross-encoder models:
 6 | - `ms-marco-minilm-l6` (22.7M parameters)
 7 | - `ms-marco-minilm-l12` (33.4M parameters)
 8 | 
 9 | ## Test Results
10 | 
11 | ### TinyBERT-L-2 (4.4M params)
12 | With different questions, the top 3 results were **identical**, showing poor discrimination.
13 | 
14 | ### MiniLM-L-6 (22.7M params)
15 | With different questions, we see **significant differences** in the top 10 results:
16 | 
17 | **Relevant Question**: "how does authentication work"
18 | - TOKENIZATION_GUIDE.md appears first (contains auth examples)
19 | - Different ordering of results
20 | - Some unique results that don't appear with nonsense query
21 | 
22 | **Nonsense Question**: "foobar random nonsense gibberish"
23 | - Different top result (README.md)
24 | - Several different files in top 10 (cli-mode.md, output-formats.md, advanced-cli.md)
25 | - Different ordering throughout
26 | 
27 | ## Usage
28 | 
29 | ```bash
30 | # TinyBERT (fastest, least accurate)
31 | probe search "auth" . --reranker ms-marco-tinybert --question "how does auth work"
32 | 
33 | # MiniLM-L6 (balanced - RECOMMENDED)
34 | probe search "auth" . --reranker ms-marco-minilm-l6 --question "how does auth work"
35 | 
36 | # MiniLM-L12 (most accurate, slower)
37 | probe search "auth" . --reranker ms-marco-minilm-l12 --question "how does auth work"
38 | ```
39 | 
40 | ## Performance
41 | 
42 | Typical search times on the test repository:
43 | - TinyBERT: ~1.1s
44 | - MiniLM-L6: ~15.5s
45 | - MiniLM-L12: ~22s (estimated)
46 | 
47 | ## Recommendations
48 | 
49 | 1. **Use MiniLM-L6 as default** for BERT reranking - it provides much better semantic understanding
50 | 2. **TinyBERT should only be used** when speed is critical and approximate ranking is acceptable
51 | 3. **MiniLM-L12 for production** when quality matters most
52 | 
53 | ## Implementation Details
54 | 
55 | The implementation:
56 | - Automatically downloads models from HuggingFace on first use
57 | - Caches models locally in `examples/reranker/models/`
58 | - Uses the same cross-encoder architecture for all models
59 | - Properly handles tokenization with `encode_pair()`
60 | - Maintains backward compatibility


--------------------------------------------------------------------------------
/npm/scripts/build-agent.cjs:
--------------------------------------------------------------------------------
 1 | const esbuild = require('esbuild');
 2 | const path = require('path');
 3 | const fs = require('fs');
 4 | 
 5 | async function buildAgent() {
 6 |   try {
 7 |     console.log('Building agent...');
 8 |     
 9 |     // Ensure build directory exists
10 |     const buildDir = path.resolve(__dirname, '..', 'build', 'agent');
11 |     if (!fs.existsSync(buildDir)) {
12 |       fs.mkdirSync(buildDir, { recursive: true });
13 |     }
14 | 
15 |     const result = await esbuild.build({
16 |       entryPoints: [path.resolve(__dirname, '..', 'src', 'agent', 'index.js')],
17 |       bundle: true,
18 |       outfile: path.resolve(buildDir, 'index.js'),
19 |       platform: 'node',
20 |       target: 'node18',
21 |       format: 'esm',
22 |       external: [
23 |         // AI SDK packages - use dynamic requires, must be external
24 |         '@modelcontextprotocol/sdk',
25 |         '@ai-sdk/anthropic',
26 |         '@ai-sdk/openai',
27 |         '@ai-sdk/google',
28 |         '@ai-sdk/amazon-bedrock',
29 |         'ai',
30 |         // Packages with dynamic requires
31 |         'fs-extra',
32 |         'tar',
33 |         'axios',
34 |         'dotenv',
35 |         // Node.js built-in modules
36 |         'fs',
37 |         'path',
38 |         'crypto',
39 |         'util',
40 |         'child_process',
41 |         'stream',
42 |         'events',
43 |         'url',
44 |         'os',
45 |         'process'
46 |         // Will bundle: glob, zod
47 |       ],
48 |       banner: {
49 |         js: '#!/usr/bin/env node'
50 |       },
51 |       minify: false, // Keep readable for debugging
52 |       sourcemap: false,
53 |       metafile: true,
54 |       logLevel: 'info'
55 |     });
56 | 
57 |     // Make the output file executable
58 |     fs.chmodSync(path.resolve(buildDir, 'index.js'), 0o755);
59 |     
60 |     console.log('Agent build completed successfully!');
61 |     
62 |     if (result.metafile) {
63 |       // Optional: log build statistics
64 |       const analysis = await esbuild.analyzeMetafile(result.metafile);
65 |       console.log('Build analysis:');
66 |       console.log(analysis);
67 |     }
68 |     
69 |   } catch (error) {
70 |     console.error('Agent build failed:', error);
71 |     process.exit(1);
72 |   }
73 | }
74 | 
75 | buildAgent();


--------------------------------------------------------------------------------
/site/.vitepress/theme/blog.css:
--------------------------------------------------------------------------------
 1 | /* Blog-specific styles */
 2 | .blog-theme {
 3 | 	--blog-primary-color: #3eaf7c;
 4 | 	--blog-text-color: #2c3e50;
 5 | 	--blog-bg-color: #ffffff;
 6 | 	--blog-header-bg: #f8f8f8;
 7 | 	--blog-border-color: #eaecef;
 8 | 	--blog-code-bg: #f8f8f8;
 9 | 	--blog-code-color: #476582;
10 | 	--blog-link-color: #3eaf7c;
11 | 	--blog-link-hover-color: #4abf8a;
12 | }
13 | 
14 | .dark .blog-theme {
15 | 	--blog-text-color: #f0f0f0;
16 | 	--blog-bg-color: #1a1a1a;
17 | 	--blog-header-bg: #252525;
18 | 	--blog-border-color: #333;
19 | 	--blog-code-bg: #282c34;
20 | 	--blog-code-color: #a8b1c2;
21 | 	--blog-link-color: #4abf8a;
22 | 	--blog-link-hover-color: #5ecf9a;
23 | }
24 | 
25 | /* Blog index page styles */
26 | .blog-post-list {
27 | 	/* display: grid; */
28 | 	/* grid-template-columns: repeat(auto-fill, minmax(300px, 1fr)); */
29 | 	/* gap: 2rem; */
30 | 	/* margin-top: 2rem; */
31 | }
32 | 
33 | .blog-post-card {
34 | 	border: 1px solid var(--blog-border-color);
35 | 	border-radius: 8px;
36 | 	padding: 1.5rem;
37 | 	transition: transform 0.2s, box-shadow 0.2s;
38 | 	width: 100%;
39 | 	/* Take full width of the container */
40 | }
41 | 
42 | .blog-post-card:hover {
43 | 	transform: translateY(-5px);
44 | 	box-shadow: 0 5px 15px rgba(0, 0, 0, 0.1);
45 | }
46 | 
47 | .blog-post-card h3 {
48 | 	margin-top: 0;
49 | 	font-size: 1.3rem;
50 | }
51 | 
52 | .blog-post-card h3 a {
53 | 	color: var(--blog-primary-color);
54 | 	text-decoration: none;
55 | }
56 | 
57 | .blog-post-meta {
58 | 	display: flex;
59 | 	flex-wrap: wrap;
60 | 	gap: 1rem;
61 | 	margin-bottom: 1rem;
62 | 	font-size: 0.9rem;
63 | 	color: var(--blog-text-color);
64 | 	opacity: 0.8;
65 | }
66 | 
67 | .blog-post-tags {
68 | 	display: flex;
69 | 	flex-wrap: wrap;
70 | 	gap: 0.5rem;
71 | 	margin-top: 1rem;
72 | }
73 | 
74 | .blog-post-tag {
75 | 	font-size: 0.8rem;
76 | 	padding: 0.2rem 0.6rem;
77 | 	border-radius: 4px;
78 | 	background-color: var(--blog-primary-color);
79 | 	color: white;
80 | 	opacity: 0.8;
81 | }
82 | 
83 | .blog-post-read-more {
84 | 	display: inline-block;
85 | 	margin-top: 1rem;
86 | 	color: var(--blog-primary-color);
87 | 	font-weight: 500;
88 | 	text-decoration: none;
89 | }
90 | 
91 | @media (max-width: 768px) {
92 | 	.blog-post-list {
93 | 		grid-template-columns: 1fr;
94 | 	}
95 | }


--------------------------------------------------------------------------------
/examples/reranker/download_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Script to download MS-MARCO cross-encoder models for local use
 4 | 
 5 | set -e
 6 | 
 7 | echo "=== MS-MARCO Model Downloader ==="
 8 | echo
 9 | 
10 | # Base directory for models
11 | MODEL_DIR="models"
12 | mkdir -p "$MODEL_DIR"
13 | 
14 | # Function to download a model
15 | download_model() {
16 |     local model_name=$1
17 |     local model_dir=$2
18 |     
19 |     echo "Downloading $model_name..."
20 |     mkdir -p "$MODEL_DIR/$model_dir"
21 |     
22 |     # Download essential files
23 |     FILES=(
24 |         "config.json"
25 |         "tokenizer.json"
26 |         "tokenizer_config.json"
27 |         "vocab.txt"
28 |         "pytorch_model.bin"
29 |         "special_tokens_map.json"
30 |     )
31 |     
32 |     for file in "${FILES[@]}"; do
33 |         if [ -f "$MODEL_DIR/$model_dir/$file" ]; then
34 |             echo "  ✓ $file already exists"
35 |         else
36 |             echo "  ⬇ Downloading $file..."
37 |             curl -L -o "$MODEL_DIR/$model_dir/$file" \
38 |                 "https://huggingface.co/$model_name/resolve/main/$file" 2>/dev/null || {
39 |                 echo "  ⚠ $file not found (might be optional)"
40 |             }
41 |         fi
42 |     done
43 |     
44 |     echo "✓ $model_name download complete"
45 |     echo
46 | }
47 | 
48 | # Download models
49 | echo "Downloading cross-encoder models..."
50 | echo
51 | 
52 | # TinyBERT (4M params) - already have this
53 | if [ -d "$MODEL_DIR/ms-marco-TinyBERT-L-2-v2" ]; then
54 |     echo "✓ TinyBERT model already exists"
55 | else
56 |     download_model "cross-encoder/ms-marco-TinyBERT-L-2-v2" "ms-marco-TinyBERT-L-2-v2"
57 | fi
58 | 
59 | # MiniLM-L6 (22M params)
60 | download_model "cross-encoder/ms-marco-MiniLM-L-6-v2" "ms-marco-MiniLM-L-6-v2"
61 | 
62 | # MiniLM-L12 (33M params)
63 | download_model "cross-encoder/ms-marco-MiniLM-L-12-v2" "ms-marco-MiniLM-L-12-v2"
64 | 
65 | echo "=== Download Complete ==="
66 | echo
67 | echo "Models available in $MODEL_DIR/:"
68 | ls -la "$MODEL_DIR/"
69 | echo
70 | echo "You can now use these rerankers:"
71 | echo "  --reranker ms-marco-tinybert    (4M params, fastest)"
72 | echo "  --reranker ms-marco-minilm-l6   (22M params, balanced)"
73 | echo "  --reranker ms-marco-minilm-l12  (33M params, most accurate)"


--------------------------------------------------------------------------------
/examples/chat/implement/core/timeouts.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Centralized timeout configuration for the implementation system
 3 |  * 
 4 |  * This file defines all timeout values used across the implementation pipeline
 5 |  * to ensure consistency and maintainability.
 6 |  */
 7 | 
 8 | export const TIMEOUTS = {
 9 |   // Main implementation timeouts (in seconds - user-friendly)
10 |   IMPLEMENT_DEFAULT: 1200,        // 20 minutes - default for Claude Code/Aider execution
11 |   IMPLEMENT_MINIMUM: 60,          // 1 minute - minimum allowed timeout
12 |   IMPLEMENT_MAXIMUM: 3600,        // 1 hour - maximum allowed timeout
13 | 
14 |   // Quick verification checks (in milliseconds)
15 |   VERSION_CHECK: 5000,            // 5 seconds - claude --version, aider --version
16 |   PATH_CHECK: 2000,               // 2 seconds - command existence checks
17 |   NPM_CHECK: 5000,                // 5 seconds - npm operations
18 |   WSL_CHECK: 2000,                // 2 seconds - WSL availability checks
19 |   
20 |   // Network operations (in milliseconds) 
21 |   HTTP_REQUEST: 10000,            // 10 seconds - GitHub URLs, remote requests
22 |   FILE_FLUSH: 5000,               // 5 seconds - file operations and flushing
23 | };
24 | 
25 | /**
26 |  * Convert seconds to milliseconds for internal use
27 |  * @param {number} seconds - Timeout in seconds
28 |  * @returns {number} Timeout in milliseconds
29 |  */
30 | export function secondsToMs(seconds) {
31 |   return seconds * 1000;
32 | }
33 | 
34 | /**
35 |  * Convert milliseconds to seconds for user display
36 |  * @param {number} milliseconds - Timeout in milliseconds
37 |  * @returns {number} Timeout in seconds
38 |  */
39 | export function msToSeconds(milliseconds) {
40 |   return Math.floor(milliseconds / 1000);
41 | }
42 | 
43 | /**
44 |  * Validate timeout value is within acceptable bounds
45 |  * @param {number} seconds - Timeout in seconds
46 |  * @returns {boolean} True if valid
47 |  */
48 | export function isValidTimeout(seconds) {
49 |   return seconds >= TIMEOUTS.IMPLEMENT_MINIMUM && seconds <= TIMEOUTS.IMPLEMENT_MAXIMUM;
50 | }
51 | 
52 | /**
53 |  * Get default timeout in milliseconds for internal use
54 |  * @returns {number} Default timeout in milliseconds
55 |  */
56 | export function getDefaultTimeoutMs() {
57 |   return secondsToMs(TIMEOUTS.IMPLEMENT_DEFAULT);
58 | }


--------------------------------------------------------------------------------
/src/language/cpp.rs:
--------------------------------------------------------------------------------
 1 | use super::language_trait::LanguageImpl;
 2 | use tree_sitter::{Language as TSLanguage, Node};
 3 | 
 4 | /// Implementation of LanguageImpl for C++
 5 | pub struct CppLanguage;
 6 | 
 7 | impl Default for CppLanguage {
 8 |     fn default() -> Self {
 9 |         Self::new()
10 |     }
11 | }
12 | 
13 | impl CppLanguage {
14 |     pub fn new() -> Self {
15 |         CppLanguage
16 |     }
17 | }
18 | 
19 | impl LanguageImpl for CppLanguage {
20 |     fn get_tree_sitter_language(&self) -> TSLanguage {
21 |         tree_sitter_cpp::LANGUAGE.into()
22 |     }
23 | 
24 |     fn get_extension(&self) -> &'static str {
25 |         "cpp"
26 |     }
27 | 
28 |     fn is_acceptable_parent(&self, node: &Node) -> bool {
29 |         matches!(
30 |             node.kind(),
31 |             "function_definition"
32 |                 | "declaration"
33 |                 | "struct_specifier"
34 |                 | "class_specifier"
35 |                 | "enum_specifier"
36 |                 | "namespace_definition"
37 |         )
38 |     }
39 | 
40 |     fn is_test_node(&self, node: &Node, source: &[u8]) -> bool {
41 |         let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
42 |         let node_type = node.kind();
43 | 
44 |         // C++: Check function_definition nodes with test in the name
45 |         if node_type == "function_definition" {
46 |             let mut cursor = node.walk();
47 |             for child in node.children(&mut cursor) {
48 |                 if child.kind() == "function_declarator" {
49 |                     let mut subcursor = child.walk();
50 |                     for subchild in child.children(&mut subcursor) {
51 |                         if subchild.kind() == "identifier" {
52 |                             let name = subchild.utf8_text(source).unwrap_or("");
53 |                             if name.contains("test") || name.contains("Test") {
54 |                                 if debug_mode {
55 |                                     println!("DEBUG: Test node detected (C++): test function");
56 |                                 }
57 |                                 return true;
58 |                             }
59 |                         }
60 |                     }
61 |                 }
62 |             }
63 |         }
64 | 
65 |         false
66 |     }
67 | }
68 | 


--------------------------------------------------------------------------------
/examples/reranker/test_parallel_performance.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "🚀 PARALLEL BERT RERANKER - COMPREHENSIVE PERFORMANCE ANALYSIS"
 4 | echo "=============================================================="
 5 | echo ""
 6 | 
 7 | cd /Users/leonidbugaev/go/src/code-search/examples/reranker
 8 | 
 9 | echo "=== CPU CORE DETECTION ==="
10 | echo "System CPU cores: $(sysctl -n hw.ncpu 2>/dev/null || nproc 2>/dev/null || echo 'unknown')"
11 | echo "Logical processors: $(sysctl -n hw.logicalcpu 2>/dev/null || echo 'unknown')"
12 | echo ""
13 | 
14 | echo "=== SEQUENTIAL vs PARALLEL COMPARISON ==="
15 | echo ""
16 | 
17 | echo "📊 Small scale comparison (20 docs):"
18 | ./target/release/benchmark --compare-modes --query "rust async programming" --num-docs 20 --iterations 2
19 | 
20 | echo ""
21 | echo "📊 Medium scale comparison (50 docs):"
22 | ./target/release/benchmark --compare-modes --query "machine learning neural network" --num-docs 50 --iterations 2
23 | 
24 | echo ""
25 | echo "📊 Large scale comparison (100 docs):"
26 | ./target/release/benchmark --compare-modes --query "database optimization indexing" --num-docs 100 --iterations 1
27 | 
28 | echo ""
29 | echo "=== PURE PARALLEL PERFORMANCE ==="
30 | echo ""
31 | 
32 | echo "🔥 Parallel BERT with auto-detected cores:"
33 | ./target/release/benchmark --parallel --query "search algorithm optimization" --num-docs 60 --iterations 3
34 | 
35 | echo ""
36 | echo "🔥 Large-scale parallel processing:"
37 | ./target/release/benchmark --parallel --query "distributed systems performance" --num-docs 120 --iterations 1
38 | 
39 | echo ""
40 | echo "=== PERFORMANCE COMPARISON SUMMARY ==="
41 | echo ""
42 | 
43 | echo "💡 Original BERT (sequential): ~7-8 docs/second"
44 | echo "🚀 Parallel BERT (multi-core):  ~30-40 docs/second"
45 | echo "📈 Demo algorithm (mock):       ~80,000+ docs/second"
46 | echo ""
47 | echo "KEY ACHIEVEMENTS:"
48 | echo "✅ 4-6x speedup with CPU parallelization"
49 | echo "✅ Real semantic understanding maintained"
50 | echo "✅ Scales efficiently with CPU cores"
51 | echo "✅ Thread-safe BERT model sharing"
52 | echo "✅ Automatic core detection and optimization"
53 | echo ""
54 | echo "=============================================================="
55 | echo "🎯 PARALLEL BERT RERANKER IMPLEMENTATION COMPLETE!"
56 | echo "=============================================================="


--------------------------------------------------------------------------------
/scripts/claude-hook-wrapper.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -euo pipefail
 3 | 
 4 | #––– Claude Hook Wrapper –––––––––––––––––––––––––––––––––––––––––––––––––
 5 | # Generic wrapper that runs any command and formats output for Claude Code
 6 | # Usage: ./scripts/claude-hook-wrapper.sh <command> [args...]
 7 | #
 8 | # Returns JSON with:
 9 | #   - decision: "approve" if command exits with 0, "block" otherwise
10 | #   - reason: Success message or command output on failure
11 | 
12 | #––– CONSTANTS ––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
13 | readonly PASS='approve'
14 | readonly FAIL='block'
15 | 
16 | #––– SHORT-CIRCUIT WHEN NESTED ––––––––––––––––––––––––––––––––––––––––––
17 | if [[ ${CLAUDE_STOP_HOOK_ACTIVE:-false} == "true" ]]; then
18 |   printf '{"decision":"%s"}\n' "$PASS"
19 |   exit 0
20 | fi
21 | 
22 | #––– CHANGE TO REPOSITORY ROOT –––––––––––––––––––––––––––––––––––––––––
23 | # This ensures relative paths work correctly regardless of where Claude runs the hook
24 | cd "$(dirname "$0")/.."
25 | 
26 | #––– VALIDATE ARGUMENTS –––––––––––––––––––––––––––––––––––––––––––––––––
27 | if [[ $# -eq 0 ]]; then
28 |   printf '{"decision":"%s","reason":"Error: No command provided to claude-hook-wrapper.sh"}\n' "$FAIL"
29 |   exit 1
30 | fi
31 | 
32 | #––– JSON ESCAPE FUNCTION ––––––––––––––––––––––––––––––––––––––––––––––
33 | json_escape() { 
34 |   if command -v jq >/dev/null 2>&1; then
35 |     jq -Rs '.' <<<"$1"
36 |   else
37 |     # Fallback if jq is not available
38 |     printf '%s' "$1" | sed 's/\\/\\\\/g; s/"/\\"/g; s/	/\\t/g' | awk '{gsub(/\r/,"\\r"); gsub(/\n/,"\\n"); printf "%s\\n", $0}' | sed '$ s/\\n$//'
39 |   fi
40 | }
41 | 
42 | #––– RUN COMMAND ––––––––––––––––––––––––––––––––––––––––––––––––––––––––
43 | # Capture both stdout and stderr
44 | output=$(mktemp)
45 | trap 'rm -f "$output"' EXIT
46 | 
47 | # Run the command, capturing all output
48 | if "$@" >"$output" 2>&1; then
49 |   # Command succeeded
50 |   printf '{"decision":"%s","reason":"✅ %s completed successfully!"}\n' "$PASS" "$1"
51 | else
52 |   # Command failed - include the output in the reason
53 |   exit_code=$?
54 |   reason=$(printf "❌ %s failed with exit code %d!\n\nOutput:\n%s\n\n💡 Please fix the issues above and try again." "$1" "$exit_code" "$(<"$output")")
55 |   printf '{"decision":"%s","reason":%s}\n' "$FAIL" "$(json_escape "$reason")"
56 | fi


--------------------------------------------------------------------------------
/examples/reranker/test_cross_encoder.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Cross-encoder testing script setup and runner
 4 | # This script sets up the Python environment and runs the cross-encoder tests
 5 | 
 6 | set -e
 7 | 
 8 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 9 | cd "$SCRIPT_DIR"
10 | 
11 | echo "=== Cross-Encoder Model Testing Setup ==="
12 | echo "Working directory: $SCRIPT_DIR"
13 | 
14 | # Check if Python 3 is available
15 | if ! command -v python3 &> /dev/null; then
16 |     echo "❌ Python 3 is required but not found"
17 |     exit 1
18 | fi
19 | 
20 | echo "✓ Python 3 found: $(python3 --version)"
21 | 
22 | # Check if pip is available
23 | if ! command -v pip3 &> /dev/null; then
24 |     echo "❌ pip3 is required but not found"
25 |     exit 1
26 | fi
27 | 
28 | echo "✓ pip3 found"
29 | 
30 | # Install or check requirements
31 | echo ""
32 | echo "Checking Python dependencies..."
33 | 
34 | # Function to check if a package is installed
35 | check_package() {
36 |     python3 -c "import $1" 2>/dev/null && return 0 || return 1
37 | }
38 | 
39 | # Check required packages
40 | REQUIRED_PACKAGES=("torch" "transformers" "numpy")
41 | MISSING_PACKAGES=()
42 | 
43 | for package in "${REQUIRED_PACKAGES[@]}"; do
44 |     if check_package "$package"; then
45 |         echo "✓ $package is installed"
46 |     else
47 |         echo "❌ $package is missing"
48 |         MISSING_PACKAGES+=("$package")
49 |     fi
50 | done
51 | 
52 | # Check optional package
53 | if check_package "sentence_transformers"; then
54 |     echo "✓ sentence-transformers is installed"
55 | else
56 |     echo "⚠️  sentence-transformers is missing (optional but recommended)"
57 |     MISSING_PACKAGES+=("sentence-transformers")
58 | fi
59 | 
60 | # Install missing packages if any
61 | if [ ${#MISSING_PACKAGES[@]} -gt 0 ]; then
62 |     echo ""
63 |     echo "Installing missing packages..."
64 |     pip3 install "${MISSING_PACKAGES[@]}"
65 |     echo "✓ Dependencies installed"
66 | else
67 |     echo "✓ All required dependencies are installed"
68 | fi
69 | 
70 | echo ""
71 | echo "=== Running Cross-Encoder Tests ==="
72 | echo ""
73 | 
74 | # Run the test script
75 | python3 test_cross_encoder.py
76 | 
77 | echo ""
78 | echo "=== Test Complete ==="
79 | echo "Check the output above for score comparisons and debugging information"
80 | echo "Results have been saved to cross_encoder_test_results.json"


--------------------------------------------------------------------------------
/src/language/php.rs:
--------------------------------------------------------------------------------
 1 | use super::language_trait::LanguageImpl;
 2 | use tree_sitter::{Language as TSLanguage, Node};
 3 | 
 4 | /// Implementation of LanguageImpl for PHP
 5 | pub struct PhpLanguage;
 6 | 
 7 | impl Default for PhpLanguage {
 8 |     fn default() -> Self {
 9 |         Self::new()
10 |     }
11 | }
12 | 
13 | impl PhpLanguage {
14 |     pub fn new() -> Self {
15 |         PhpLanguage
16 |     }
17 | }
18 | 
19 | impl LanguageImpl for PhpLanguage {
20 |     fn get_tree_sitter_language(&self) -> TSLanguage {
21 |         tree_sitter_php::LANGUAGE_PHP.into()
22 |     }
23 | 
24 |     fn get_extension(&self) -> &'static str {
25 |         "php"
26 |     }
27 | 
28 |     fn is_acceptable_parent(&self, node: &Node) -> bool {
29 |         matches!(
30 |             node.kind(),
31 |             "function_definition"
32 |                 | "method_declaration"
33 |                 | "class_declaration"
34 |                 | "interface_declaration"
35 |                 | "trait_declaration"
36 |         )
37 |     }
38 | 
39 |     fn is_test_node(&self, node: &Node, source: &[u8]) -> bool {
40 |         let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
41 |         let node_type = node.kind();
42 | 
43 |         // PHP: Check method_declaration nodes with test prefix or PHPUnit annotations
44 |         if node_type == "method_declaration" {
45 |             let mut cursor = node.walk();
46 |             for child in node.children(&mut cursor) {
47 |                 if child.kind() == "name" {
48 |                     let name = child.utf8_text(source).unwrap_or("");
49 |                     if name.starts_with("test") {
50 |                         if debug_mode {
51 |                             println!("DEBUG: Test node detected (PHP): test method");
52 |                         }
53 |                         return true;
54 |                     }
55 |                 } else if child.kind() == "comment" {
56 |                     let comment = child.utf8_text(source).unwrap_or("");
57 |                     if comment.contains("@test") {
58 |                         if debug_mode {
59 |                             println!("DEBUG: Test node detected (PHP): @test annotation");
60 |                         }
61 |                         return true;
62 |                     }
63 |                 }
64 |             }
65 |         }
66 | 
67 |         false
68 |     }
69 | }
70 | 


--------------------------------------------------------------------------------
/.prompts/engineer.md:
--------------------------------------------------------------------------------
 1 | You are senior enginer focused software architecture and design. Before jumping on the task you first, in details analyse user request, and try to provide elegant and concise solution. If solution is clear, you can jump to implementation right away, if not, you can ask user a clarification question, by calling attempt_completion tool, with required details. You are allowed to use search tool with allow_tests argument, in order to find the tests. When you are reviewing pull request, or asked to do a suggestions to PR, you can use implement tool too.
 2 | 
 3 | Before jumping to implementation:
 4 | - Focus on high-level design patterns and system organization
 5 | - Identify architectural patterns and component relationships
 6 | - Evaluate system structure and suggest architectural improvements
 7 | - Focus on backward compatibility.
 8 | - Respond with diagrams to illustrate system architecture and workflows, if required.
 9 | - Consider scalability, maintainability, and extensibility in your analysis
10 | 
11 | During the implementation:
12 | - Avoid implementing special cases
13 | - Do not forget to add the tests
14 | 
15 | ## Failure Tag Feature
16 | 
17 | When working on GitHub Actions workflows, you can use the failure tag feature to signal critical issues that should prevent code from being merged:
18 | 
19 | - Include `<fail>` in your response when you detect critical issues like security vulnerabilities, breaking changes without proper documentation, or severe bugs
20 | - The tag will be automatically removed from your comment, but a failure message will be added at the top
21 | - The GitHub check will fail, drawing attention to these critical issues
22 | - Use this feature judiciously - only for issues that truly warrant failing the CI check
23 | 
24 | ### Example Usage
25 | 
26 | ```
27 | <fail>
28 | 
29 | I found a critical security vulnerability in the authentication code that allows SQL injection attacks. This must be fixed before merging.
30 | 
31 | ## Security Issues Found
32 | 
33 | 1. **SQL Injection in login.js** - User input is directly concatenated into SQL queries
34 | 2. **Missing input validation** - No sanitization of user credentials
35 | 
36 | ## Recommendations
37 | - Use parameterized queries
38 | - Add input validation middleware
39 | ```
40 | 
41 | The `<fail>` tag will be stripped from the comment, but the GitHub check will fail to prevent merging until the issues are resolved.
42 | 


--------------------------------------------------------------------------------
/tests/outline_keyword_preservation_test.rs:
--------------------------------------------------------------------------------
 1 | use std::process::Command;
 2 | 
 3 | #[test]
 4 | fn test_outline_format_preserves_keywords_in_truncated_arrays() {
 5 |     // Run probe search with outline format on a file known to have large arrays with keywords
 6 |     let output = Command::new("./target/release/probe")
 7 |         .args([
 8 |             "search",
 9 |             "stemming",
10 |             "./src/search/tokenization.rs",
11 |             "--format",
12 |             "outline",
13 |         ])
14 |         .output()
15 |         .expect("Failed to execute probe command");
16 | 
17 |     let stdout = String::from_utf8(output.stdout).expect("Invalid UTF-8 in output");
18 | 
19 |     // The output should contain the keyword "stemming" even in truncated arrays
20 |     assert!(
21 |         stdout.contains("stemming"),
22 |         "Output should contain 'stemming' keyword even in truncated arrays"
23 |     );
24 | 
25 |     // The output should show truncation with "..."
26 |     assert!(
27 |         stdout.contains("..."),
28 |         "Output should show truncation with ellipsis"
29 |     );
30 | 
31 |     // The output should have reasonable length (not thousands of lines like before)
32 |     let line_count = stdout.lines().count();
33 |     assert!(
34 |         line_count < 200,
35 |         "Output should be truncated to reasonable size, got {} lines",
36 |         line_count
37 |     );
38 | }
39 | 
40 | #[test]
41 | fn test_outline_format_highlights_keywords_in_comments() {
42 |     // Test that keywords are highlighted in function signatures and comments
43 |     let output = Command::new("./target/release/probe")
44 |         .args([
45 |             "search",
46 |             "stem",
47 |             "./src/search/tokenization.rs",
48 |             "--format",
49 |             "outline",
50 |         ])
51 |         .output()
52 |         .expect("Failed to execute probe command");
53 | 
54 |     let stdout = String::from_utf8(output.stdout).expect("Invalid UTF-8 in output");
55 | 
56 |     // Should contain the function name with highlighting (though we can't test ANSI codes easily)
57 |     assert!(
58 |         stdout.contains("tokenize_and_stem"),
59 |         "Should contain function name with stem keyword"
60 |     );
61 | 
62 |     // Should contain comment lines with the keyword
63 |     assert!(
64 |         stdout.contains("apply stemming"),
65 |         "Should contain comment with stemming keyword"
66 |     );
67 | }
68 | 


--------------------------------------------------------------------------------
/src/language/java.rs:
--------------------------------------------------------------------------------
 1 | use super::language_trait::LanguageImpl;
 2 | use tree_sitter::{Language as TSLanguage, Node};
 3 | 
 4 | /// Implementation of LanguageImpl for Java
 5 | pub struct JavaLanguage;
 6 | 
 7 | impl Default for JavaLanguage {
 8 |     fn default() -> Self {
 9 |         Self::new()
10 |     }
11 | }
12 | 
13 | impl JavaLanguage {
14 |     pub fn new() -> Self {
15 |         JavaLanguage
16 |     }
17 | }
18 | 
19 | impl LanguageImpl for JavaLanguage {
20 |     fn get_tree_sitter_language(&self) -> TSLanguage {
21 |         tree_sitter_java::LANGUAGE.into()
22 |     }
23 | 
24 |     fn get_extension(&self) -> &'static str {
25 |         "java"
26 |     }
27 | 
28 |     fn is_acceptable_parent(&self, node: &Node) -> bool {
29 |         matches!(
30 |             node.kind(),
31 |             "method_declaration"
32 |                 | "class_declaration"
33 |                 | "interface_declaration"
34 |                 | "enum_declaration"
35 |                 | "constructor_declaration"
36 |                 | "field_declaration"
37 |                 | "variable_declaration"
38 |                 | "block"
39 |                 | "static_initializer"
40 |         )
41 |     }
42 | 
43 |     fn is_test_node(&self, node: &Node, source: &[u8]) -> bool {
44 |         let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
45 |         let node_type = node.kind();
46 | 
47 |         // Java: Check method_declaration nodes with @Test annotation
48 |         if node_type == "method_declaration" {
49 |             let mut cursor = node.walk();
50 |             for child in node.children(&mut cursor) {
51 |                 if child.kind() == "modifiers" {
52 |                     let mut subcursor = child.walk();
53 |                     for annotation in child.children(&mut subcursor) {
54 |                         if annotation.kind() == "annotation" {
55 |                             let annotation_text = annotation.utf8_text(source).unwrap_or("");
56 |                             if annotation_text.contains("@Test") {
57 |                                 if debug_mode {
58 |                                     println!("DEBUG: Test node detected (Java): @Test method");
59 |                                 }
60 |                                 return true;
61 |                             }
62 |                         }
63 |                     }
64 |                 }
65 |             }
66 |         }
67 | 
68 |         false
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/src/language/ruby.rs:
--------------------------------------------------------------------------------
 1 | use super::language_trait::LanguageImpl;
 2 | use tree_sitter::{Language as TSLanguage, Node};
 3 | 
 4 | /// Implementation of LanguageImpl for Ruby
 5 | pub struct RubyLanguage;
 6 | 
 7 | impl Default for RubyLanguage {
 8 |     fn default() -> Self {
 9 |         Self::new()
10 |     }
11 | }
12 | 
13 | impl RubyLanguage {
14 |     pub fn new() -> Self {
15 |         RubyLanguage
16 |     }
17 | }
18 | 
19 | impl LanguageImpl for RubyLanguage {
20 |     fn get_tree_sitter_language(&self) -> TSLanguage {
21 |         tree_sitter_ruby::LANGUAGE.into()
22 |     }
23 | 
24 |     fn get_extension(&self) -> &'static str {
25 |         "rb"
26 |     }
27 | 
28 |     fn is_acceptable_parent(&self, node: &Node) -> bool {
29 |         matches!(
30 |             node.kind(),
31 |             "method" | "class" | "module" | "singleton_method"
32 |         )
33 |     }
34 | 
35 |     fn is_test_node(&self, node: &Node, source: &[u8]) -> bool {
36 |         let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
37 |         let node_type = node.kind();
38 | 
39 |         // Ruby: Check method nodes with test_ prefix or describe/it blocks
40 |         if node_type == "method" {
41 |             let mut cursor = node.walk();
42 |             for child in node.children(&mut cursor) {
43 |                 if child.kind() == "identifier" {
44 |                     let name = child.utf8_text(source).unwrap_or("");
45 |                     if name.starts_with("test_") {
46 |                         if debug_mode {
47 |                             println!("DEBUG: Test node detected (Ruby): test_ method");
48 |                         }
49 |                         return true;
50 |                     }
51 |                 }
52 |             }
53 |         } else if node_type == "call" {
54 |             let mut cursor = node.walk();
55 |             for child in node.children(&mut cursor) {
56 |                 if child.kind() == "identifier" {
57 |                     let name = child.utf8_text(source).unwrap_or("");
58 |                     if name == "describe" || name == "it" || name == "context" || name == "specify"
59 |                     {
60 |                         if debug_mode {
61 |                             println!("DEBUG: Test node detected (Ruby): {name} block");
62 |                         }
63 |                         return true;
64 |                     }
65 |                 }
66 |             }
67 |         }
68 | 
69 |         false
70 |     }
71 | }
72 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Rust specific ignores
  2 | /target/
  3 | **/*.rs.bk
  4 | *.pdb
  5 | Cargo.lock
  6 | .vscode
  7 | # Uncomment the line below if this is a library (keep it commented if it's an application)
  8 | # Cargo.lock
  9 | 
 10 | # JSON files
 11 | *.json
 12 | !Cargo.json
 13 | !package.json
 14 | !tsconfig.json
 15 | 
 16 | # YAML files
 17 | *.yml
 18 | *.yaml
 19 | 
 20 | # Node.js specific ignores
 21 | node_modules
 22 | npm-debug.log
 23 | yarn-debug.log
 24 | yarn-error.log
 25 | .pnpm-debug.log
 26 | .npm
 27 | .yarn/cache
 28 | .yarn/unplugged
 29 | .yarn/build-state.yml
 30 | .yarn/install-state.gz
 31 | .pnp.*
 32 | package-lock.json
 33 | # Uncomment the line below if you want to include package-lock.json in version control
 34 | # !package-lock.json
 35 | 
 36 | # Build outputs
 37 | /dist/
 38 | /build/
 39 | npm/build/
 40 | npm/cjs/
 41 | /out/
 42 | /.next/
 43 | /.nuxt/
 44 | /.output/
 45 | 
 46 | # Environment variables
 47 | .env
 48 | .env.local
 49 | .env.development.local
 50 | .env.test.local
 51 | .env.production.local
 52 | 
 53 | # Logs
 54 | logs
 55 | *.log
 56 | *.jsonl
 57 | npm-debug.log*
 58 | yarn-debug.log*
 59 | yarn-error.log*
 60 | pnpm-debug.log*
 61 | lerna-debug.log*
 62 | 
 63 | # Editor directories and files
 64 | .idea/
 65 | .vscode/*
 66 | !.vscode/extensions.json
 67 | !.vscode/settings.json
 68 | !.vscode/tasks.json
 69 | !.vscode/launch.json
 70 | *.suo
 71 | *.ntvs*
 72 | *.njsproj
 73 | *.sln
 74 | *.sw?
 75 | .DS_Store
 76 | .AppleDouble
 77 | .LSOverride
 78 | Thumbs.db
 79 | ehthumbs.db
 80 | Desktop.ini
 81 | $RECYCLE.BIN/
 82 | 
 83 | # Testing
 84 | /coverage
 85 | .nyc_output
 86 | npm/coverage/
 87 | **/coverage/
 88 | .jest-cache/
 89 | **/.jest-cache/
 90 | test-results/
 91 | junit.xml
 92 | *.lcov
 93 | 
 94 | # Temporary files
 95 | *.tmp
 96 | *.temp
 97 | .cache/
 98 | .parcel-cache/
 99 | .eslintcache
100 | .stylelintcache
101 | 
102 | # Rust analyzer
103 | rust-project.json
104 | 
105 | # Debug files
106 | *.stackdump
107 | 
108 | # Protocol Buffer files
109 | *.proto
110 | *_pb2.py
111 | *.pb.css
112 | *.pb.h
113 | *.pb.*
114 | 
115 | 
116 | .vitepress
117 | .aider*
118 | 
119 | # Chat debug files
120 | examples/chat/probe-debug*.txt
121 | 
122 | # BERT model files (too large for git)
123 | examples/reranker/models/*/pytorch_model.bin
124 | examples/reranker/models/*/model.safetensors
125 | examples/reranker/models/*/vocab.txt
126 | examples/reranker/cross_encoder_test_results.json
127 | *.tgz
128 | 
129 | # Vow - AI accountability files
130 | .vow*
131 | 


--------------------------------------------------------------------------------
/npm/test-codex-e2e.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | 
 3 | /**
 4 |  * End-to-end test for Codex integration
 5 |  * Run with: node test-codex-e2e.js
 6 |  */
 7 | 
 8 | import { ProbeAgent } from './src/agent/ProbeAgent.js';
 9 | 
10 | console.log('🧪 Codex Integration E2E Test\n');
11 | console.log('Testing basic query with Codex engine...\n');
12 | 
13 | async function main() {
14 |   let agent;
15 | 
16 |   try {
17 |     // Create agent with Codex provider (use default model, not gpt-4o)
18 |     console.log('1️⃣  Creating ProbeAgent with provider: codex (using default model)');
19 |     agent = new ProbeAgent({
20 |       provider: 'codex',
21 |       model: null,  // Don't specify model, let Codex use its default
22 |       allowedFolders: [process.cwd()],
23 |       debug: true
24 |     });
25 | 
26 |     console.log('\n2️⃣  Initializing agent...');
27 |     await agent.initialize();
28 | 
29 |     console.log('\n✅ Agent initialized successfully!');
30 |     console.log(`   Provider: ${agent.clientApiProvider}`);
31 |     console.log(`   API Type: ${agent.apiType}`);
32 |     console.log(`   Model: ${agent.model}`);
33 | 
34 |     // Test simple query
35 |     console.log('\n3️⃣  Testing simple query: "What is 2 + 2?"');
36 |     console.log('   (This should trigger Codex CLI)\n');
37 | 
38 |     const response = await agent.answer('What is 2 + 2?');
39 | 
40 |     console.log('\n✅ Query completed!');
41 |     console.log('\n📝 Response:');
42 |     console.log('─'.repeat(60));
43 |     console.log(response);
44 |     console.log('─'.repeat(60));
45 | 
46 |     // Clean up
47 |     console.log('\n4️⃣  Cleaning up...');
48 |     if (agent.engine && agent.engine.close) {
49 |       await agent.engine.close();
50 |     }
51 | 
52 |     console.log('\n✅ All tests passed! 🎉\n');
53 |     process.exit(0);
54 | 
55 |   } catch (error) {
56 |     console.error('\n❌ Test failed:', error.message);
57 |     console.error('\nStack trace:');
58 |     console.error(error.stack);
59 | 
60 |     // Clean up on error
61 |     if (agent?.engine?.close) {
62 |       try {
63 |         await agent.engine.close();
64 |       } catch (cleanupError) {
65 |         // Ignore cleanup errors
66 |       }
67 |     }
68 | 
69 |     console.log('\n💡 Common issues:');
70 |     console.log('   - Make sure Codex CLI is installed: https://openai.com/codex');
71 |     console.log('   - Check that you can run: codex --version');
72 |     console.log('   - Ensure you have an active Codex session');
73 | 
74 |     process.exit(1);
75 |   }
76 | }
77 | 
78 | main();
79 | 


--------------------------------------------------------------------------------
/npm/tests/unit/extract-content.test.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Test for extract() function with content parameter
 3 |  * This test verifies the fix for the process.env.DEBUG bug
 4 |  */
 5 | 
 6 | import { extract } from '../../src/index.js';
 7 | import path from 'path';
 8 | 
 9 | describe('extract() with content parameter', () => {
10 | 	// Sample diff content for testing
11 | 	const diffContent = `diff --git a/src/main.rs b/src/main.rs
12 | index 123..456
13 | --- a/src/main.rs
14 | +++ b/src/main.rs
15 | @@ -10,3 +10,4 @@
16 |  fn main() {
17 | -    println!("old");
18 | +    println!("new");
19 |  }`;
20 | 
21 | 	test('should process diff content without crashing', async () => {
22 | 		// This test verifies that the extract function doesn't crash
23 | 		// when accessing process.env.DEBUG
24 | 		const result = await extract({
25 | 			content: diffContent,
26 | 			format: 'outline-xml',
27 | 		});
28 | 
29 | 		// Should return a result (string for outline-xml format)
30 | 		expect(result).toBeDefined();
31 | 		expect(typeof result).toBe('string');
32 | 		expect(result.length).toBeGreaterThan(0);
33 | 	});
34 | 
35 | 	test('should handle DEBUG environment variable correctly', async () => {
36 | 		// Test with DEBUG enabled
37 | 		const originalDebug = process.env.DEBUG;
38 | 		process.env.DEBUG = '1';
39 | 
40 | 		try {
41 | 			const result = await extract({
42 | 				content: diffContent,
43 | 				format: 'outline-xml',
44 | 			});
45 | 
46 | 			expect(result).toBeDefined();
47 | 		} finally {
48 | 			// Restore original DEBUG value
49 | 			if (originalDebug === undefined) {
50 | 				delete process.env.DEBUG;
51 | 			} else {
52 | 				process.env.DEBUG = originalDebug;
53 | 			}
54 | 		}
55 | 	});
56 | 
57 | 	test('should work with outline-xml format', async () => {
58 | 		const result = await extract({
59 | 			content: diffContent,
60 | 			format: 'outline-xml',
61 | 		});
62 | 
63 | 		expect(result).toBeDefined();
64 | 		expect(typeof result).toBe('string');
65 | 		expect(result.length).toBeGreaterThan(0);
66 | 	});
67 | 
68 | 	test('should handle errors gracefully', async () => {
69 | 		// Test with invalid content
70 | 		try {
71 | 			await extract({
72 | 				content: 'invalid diff content',
73 | 				format: 'outline-xml',
74 | 			});
75 | 			// If it succeeds, that's also acceptable
76 | 		} catch (error) {
77 | 			// Should throw a proper Error object, not a TypeError about undefined
78 | 			expect(error).toBeInstanceOf(Error);
79 | 			expect(error.message).not.toContain('Cannot read properties of undefined');
80 | 			expect(error.message).not.toContain('process2');
81 | 		}
82 | 	});
83 | });
84 | 


--------------------------------------------------------------------------------
/examples/chat/cancelRequest.js:
--------------------------------------------------------------------------------
 1 | // Map to store active requests by session ID
 2 | const activeRequests = new Map();
 3 | 
 4 | /**
 5 |  * Register a request as active
 6 |  * @param {string} sessionId - The session ID
 7 |  * @param {Object} requestData - Data about the request (can include abort functions, etc.)
 8 |  */
 9 | export function registerRequest(sessionId, requestData) {
10 | 	if (!sessionId) {
11 | 		console.warn('Attempted to register request without session ID');
12 | 		return;
13 | 	}
14 | 
15 | 	console.log(`Registering request for session: ${sessionId}`);
16 | 	activeRequests.set(sessionId, requestData);
17 | }
18 | 
19 | /**
20 |  * Cancel a request by session ID
21 |  * @param {string} sessionId - The session ID
22 |  * @returns {boolean} - Whether the cancellation was successful
23 |  */
24 | export function cancelRequest(sessionId) {
25 | 	if (!sessionId) {
26 | 		console.warn('Attempted to cancel request without session ID');
27 | 		return false;
28 | 	}
29 | 
30 | 	const requestData = activeRequests.get(sessionId);
31 | 	if (!requestData) {
32 | 		console.warn(`No active request found for session: ${sessionId}`);
33 | 		return false;
34 | 	}
35 | 
36 | 	console.log(`Cancelling request for session: ${sessionId}`);
37 | 
38 | 	// Call the abort function if it exists
39 | 	if (typeof requestData.abort === 'function') {
40 | 		try {
41 | 			requestData.abort();
42 | 			console.log(`Successfully aborted request for session: ${sessionId}`);
43 | 		} catch (error) {
44 | 			console.error(`Error aborting request for session ${sessionId}:`, error);
45 | 		}
46 | 	}
47 | 
48 | 	// Remove the request from the active requests map
49 | 	activeRequests.delete(sessionId);
50 | 	return true;
51 | }
52 | 
53 | /**
54 |  * Check if a request is active
55 |  * @param {string} sessionId - The session ID
56 |  * @returns {boolean} - Whether the request is active
57 |  */
58 | export function isRequestActive(sessionId) {
59 | 	return activeRequests.has(sessionId);
60 | }
61 | 
62 | /**
63 |  * Get all active requests
64 |  * @returns {Map} - Map of all active requests
65 |  */
66 | export function getActiveRequests() {
67 | 	return activeRequests;
68 | }
69 | 
70 | /**
71 |  * Clear a request from the active requests map
72 |  * @param {string} sessionId - The session ID
73 |  */
74 | export function clearRequest(sessionId) {
75 | 	if (!sessionId) {
76 | 		console.warn('Attempted to clear request without session ID');
77 | 		return;
78 | 	}
79 | 
80 | 	if (activeRequests.has(sessionId)) {
81 | 		console.log(`Clearing request for session: ${sessionId}`);
82 | 		activeRequests.delete(sessionId);
83 | 	}
84 | }


--------------------------------------------------------------------------------
/src/search/limits.rs:
--------------------------------------------------------------------------------
 1 | use probe_code::models::{LimitedSearchResults, SearchResult};
 2 | use probe_code::search::token_utils::count_tokens;
 3 | 
 4 | /// Helper function to apply limits to search results
 5 | pub fn apply_limits(
 6 |     results: Vec<SearchResult>,
 7 |     max_results: Option<usize>,
 8 |     max_bytes: Option<usize>,
 9 |     max_tokens: Option<usize>,
10 | ) -> LimitedSearchResults {
11 |     // If no limits are specified, return all results
12 |     if max_results.is_none() && max_bytes.is_none() && max_tokens.is_none() {
13 |         return LimitedSearchResults {
14 |             results,
15 |             truncated: false,
16 |             total_results: results.len(),
17 |             total_bytes: results.iter().map(|r| r.content.len()).sum(),
18 |             total_tokens: results.iter().map(|r| count_tokens(&r.content)).sum(),
19 |         };
20 |     }
21 | 
22 |     let mut limited_results = Vec::new();
23 |     let mut current_bytes = 0;
24 |     let mut current_tokens = 0;
25 |     let mut truncated = false;
26 | 
27 |     // Calculate total bytes and tokens for all results
28 |     let total_bytes = results.iter().map(|r| r.content.len()).sum();
29 |     let total_tokens = results.iter().map(|r| count_tokens(&r.content)).sum();
30 | 
31 |     // Apply limits
32 |     for result in results {
33 |         // Check if we've reached the maximum number of results
34 |         if let Some(max) = max_results {
35 |             if limited_results.len() >= max {
36 |                 truncated = true;
37 |                 break;
38 |             }
39 |         }
40 | 
41 |         // Check if adding this result would exceed the maximum bytes
42 |         if let Some(max) = max_bytes {
43 |             if current_bytes + result.content.len() > max {
44 |                 truncated = true;
45 |                 break;
46 |             }
47 |         }
48 | 
49 |         // Check if adding this result would exceed the maximum tokens
50 |         if let Some(max) = max_tokens {
51 |             let result_tokens = count_tokens(&result.content);
52 |             if current_tokens + result_tokens > max {
53 |                 truncated = true;
54 |                 break;
55 |             }
56 |             current_tokens += result_tokens;
57 |         }
58 | 
59 |         // Add the result to the limited results
60 |         current_bytes += result.content.len();
61 |         limited_results.push(result);
62 |     }
63 | 
64 |     LimitedSearchResults {
65 |         results: limited_results,
66 |         truncated,
67 |         total_results: limited_results.len(),
68 |         total_bytes: current_bytes,
69 |         total_tokens: current_tokens,
70 |     }
71 | }
72 | 


--------------------------------------------------------------------------------
/examples/chat/auth.js:
--------------------------------------------------------------------------------
 1 | import 'dotenv/config';
 2 | 
 3 | /**
 4 |  * Basic authentication middleware
 5 |  * Checks for valid username and password in the Authorization header
 6 |  * Can be enabled/disabled via environment variables
 7 |  */
 8 | export function authMiddleware(req, res, next) {
 9 | 	// Check if authentication is enabled
10 | 	const AUTH_ENABLED = process.env.AUTH_ENABLED === '1';
11 | 
12 | 	// If authentication is not enabled, skip authentication check
13 | 	if (!AUTH_ENABLED) {
14 | 		return next(req, res);
15 | 	}
16 | 
17 | 	// Get configured username and password from environment variables
18 | 	const AUTH_USERNAME = process.env.AUTH_USERNAME || 'admin';
19 | 	const AUTH_PASSWORD = process.env.AUTH_PASSWORD || 'password';
20 | 
21 | 	// Check if request has Authorization header
22 | 	const authHeader = req.headers.authorization;
23 | 
24 | 	if (!authHeader) {
25 | 		// No Authorization header, return 401 Unauthorized
26 | 		res.writeHead(401, {
27 | 			'Content-Type': 'text/plain',
28 | 			'WWW-Authenticate': 'Basic realm="Probe Code Search"'
29 | 		});
30 | 		res.end('Authentication required');
31 | 		return;
32 | 	}
33 | 
34 | 	// Parse Authorization header
35 | 	try {
36 | 		// Basic auth format: "Basic base64(username:password)"
37 | 		const authParts = authHeader.split(' ');
38 | 		if (authParts.length !== 2 || authParts[0] !== 'Basic') {
39 | 			throw new Error('Invalid Authorization header format');
40 | 		}
41 | 
42 | 		// Decode base64 credentials
43 | 		const credentials = Buffer.from(authParts[1], 'base64').toString('utf-8');
44 | 		const [username, password] = credentials.split(':');
45 | 
46 | 		// Check if credentials match
47 | 		if (username === AUTH_USERNAME && password === AUTH_PASSWORD) {
48 | 			// Authentication successful, proceed to next middleware
49 | 			return next(req, res);
50 | 		} else {
51 | 			// Invalid credentials, return 401 Unauthorized
52 | 			res.writeHead(401, {
53 | 				'Content-Type': 'text/plain',
54 | 				'WWW-Authenticate': 'Basic realm="Probe Code Search"'
55 | 			});
56 | 			res.end('Invalid credentials');
57 | 			return;
58 | 		}
59 | 	} catch (error) {
60 | 		// Error parsing Authorization header, return 400 Bad Request
61 | 		res.writeHead(400, { 'Content-Type': 'text/plain' });
62 | 		res.end('Invalid Authorization header');
63 | 		return;
64 | 	}
65 | }
66 | 
67 | /**
68 |  * Apply authentication middleware to a request handler
69 |  * @param {Function} handler - The request handler function
70 |  * @returns {Function} - A new handler function with authentication
71 |  */
72 | export function withAuth(handler) {
73 | 	return (req, res) => {
74 | 		authMiddleware(req, res, () => handler(req, res));
75 | 	};
76 | }


--------------------------------------------------------------------------------
/site/.vitepress/theme/home.css:
--------------------------------------------------------------------------------
  1 | :root {
  2 |   --vp-home-hero-name-color: var(--vp-c-brand);
  3 |   --vp-home-hero-text-color: var(--vp-c-text-1);
  4 |   --vp-home-hero-tagline-color: var(--vp-c-text-2);
  5 |   --home-bg-overlay: rgba(255, 255, 255, 0.9);
  6 |   --home-feature-bg: rgba(255, 255, 255, 0.9);
  7 |   --home-border-color: rgba(60, 60, 60, 0.12);
  8 | }
  9 | 
 10 | .dark {
 11 |   --home-bg-overlay: rgba(26, 26, 26, 0.9);
 12 |   --home-feature-bg: rgba(26, 26, 26, 0.9);
 13 |   --home-border-color: rgba(200, 200, 200, 0.12);
 14 | }
 15 | 
 16 | .VPHome {
 17 |   position: relative;
 18 |   z-index: 1;
 19 | }
 20 | 
 21 | .VPHome .VPHomeHero {
 22 |   background: transparent;
 23 |   position: relative;
 24 |   z-index: 2;
 25 | }
 26 | 
 27 | .VPHome .VPHomeHero .image {
 28 |   background: transparent;
 29 |   margin-top: 2rem;
 30 | }
 31 | 
 32 | .VPHome .VPHomeHero .image img {
 33 |   max-width: 300px;
 34 |   height: auto;
 35 |   margin: 0 auto;
 36 | }
 37 | 
 38 | .VPHome .VPFeatures {
 39 |   background: var(--home-feature-bg);
 40 |   backdrop-filter: blur(10px);
 41 |   border-radius: 12px;
 42 |   padding: 2rem;
 43 |   margin: 2rem auto;
 44 |   max-width: 1200px;
 45 |   position: relative;
 46 |   z-index: 2;
 47 |   border: 1px solid var(--home-border-color);
 48 | }
 49 | 
 50 | .VPHome .VPFeatures .VPFeature {
 51 |   background: transparent;
 52 | }
 53 | 
 54 | .main-content {
 55 |   position: relative;
 56 |   z-index: 2;
 57 |   max-width: 1200px;
 58 |   margin: 0 auto;
 59 |   padding: 2rem;
 60 |   /* background: var(--home-bg-overlay); */
 61 |   /* backdrop-filter: blur(10px); */
 62 |   /* border-radius: 12px; */
 63 |   /* border: 1px solid var(--home-border-color); */
 64 | }
 65 | 
 66 | @media (max-width: 768px) {
 67 |   .main-content {
 68 |     padding: 1rem 0.5rem;
 69 |   }
 70 | }
 71 | 
 72 | @media (max-width: 640px) {
 73 |   .main-content {
 74 |     padding: 0.75rem 0.25rem;
 75 |   }
 76 | }
 77 | 
 78 | .VPFeatures {
 79 |   margin-bottom: 4rem !important;
 80 | }
 81 | 
 82 | /* Add styles for FeatureSection */
 83 | .FeatureSection {
 84 |   display: grid;
 85 |   grid-template-columns: 1fr 1fr;
 86 |   gap: 2rem;
 87 |   margin-bottom: 4rem;
 88 |   align-items: start;
 89 | }
 90 | 
 91 | .FeatureSection :deep(h2) {
 92 |   margin-top: 0;
 93 |   color: var(--text-primary);
 94 | }
 95 | 
 96 | .FeatureSection :deep(pre) {
 97 |   margin: 1rem 0;
 98 |   background: var(--bg-code-block);
 99 |   border: 1px solid var(--border-color);
100 | }
101 | 
102 | .FeatureSection :deep(p) {
103 |   margin: 1rem 0;
104 |   color: var(--text-secondary);
105 | }
106 | 
107 | @media (max-width: 768px) {
108 |   .FeatureSection {
109 |     grid-template-columns: 1fr;
110 |   }
111 | }


--------------------------------------------------------------------------------
/.roomodes:
--------------------------------------------------------------------------------
 1 | {
 2 |   "customModes": [
 3 |     {
 4 |       "slug": "ask-probe",
 5 |       "name": "Ask Probe",
 6 |       "roleDefinition": "You intelligence assistant for developers, product managers, QA engineers, and documentation writers, designed to search and analyze multi-language codebases efficiently. Instead of standard file search and file read tools you should use Probe Agent tool, and forward it all the questions about the codebase.",
 7 |       "customInstructions": "Where relevant, add mermaid diagrams.",
 8 |       "groups": [
 9 |         "read",
10 |         "mcp"
11 |       ],
12 |       "source": "project"
13 |     },
14 |     {
15 |       "slug": "doc-writer",
16 |       "name": "Documentation Writer",
17 |       "roleDefinition": "You are Roo, a technical documentation specialist focused on creating and maintaining high-quality documentation for the Probe code search tool. Your expertise includes:\n- Writing clear, concise, and accurate technical documentation\n- Organizing information in a logical and user-friendly manner\n- Maintaining consistent style and formatting across documentation\n- Creating examples that effectively demonstrate features\n- Ensuring documentation is up-to-date with the latest features and changes\n- Understanding technical concepts and explaining them in accessible language",
18 |       "customInstructions": "When updating documentation:\n\n1. **Maintain Consistency**:\n   - Follow existing formatting patterns and style conventions\n   - Use consistent heading levels (# for main titles, ## for sections, etc.)\n   - Maintain the existing frontmatter structure in files that have it\n\n2. **Content Guidelines**:\n   - Be concise but thorough - aim for clarity above all\n   - Include practical examples where appropriate\n   - Use code blocks with proper syntax highlighting\n   - Structure content with clear headings and bullet points\n   - Focus on user benefits, not just feature descriptions\n\n3. **Technical Accuracy**:\n   - Ensure command examples are correct and tested\n   - Verify that feature descriptions match actual implementation\n   - Update version numbers and compatibility information as needed\n   - Cross-reference related documentation sections\n\n4. **Special Components**:\n   - Use <CodeEditor> for code examples\n   - Use <CommandExample> for CLI commands\n   - Maintain proper frontmatter for pages that use it\n\n5. **Navigation**:\n   - Ensure proper linking between related documentation pages\n   - Update navigation references when adding new content\n\n6. Website is located in ./site/ folder, and use vitepress",
19 |       "groups": [
20 |         "read",
21 |         "browser",
22 |         "edit",
23 |         "command"
24 |       ],
25 |       "source": "project"
26 |     }
27 |   ]
28 | }


--------------------------------------------------------------------------------
/site/.vitepress/theme/components/CodeEditor.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <div class="code-editor">
 3 |     <div class="code-editor-header">
 4 |       <div class="window-button close"></div>
 5 |       <div class="window-button minimize"></div>
 6 |       <div class="window-button maximize"></div>
 7 |       <div class="code-editor-path" v-if="filePath">{{ filePath }}</div>
 8 |     </div>
 9 |     <div class="code-editor-content" :class="{ 'has-matches': hasMatches }">
10 |       <pre><code><slot></slot></code></pre>
11 |       <div v-if="hasMatches" class="code-matches">
12 |         <div v-for="(match, index) in matches" :key="index" class="match">
13 |           <div class="match-highlight" :style="{ top: `${match.line * 24}px` }"></div>
14 |           <div class="match-indicator" :style="{ top: `${match.line * 24}px` }">
15 |             <span class="match-line">Line {{ match.line }}</span>
16 |             <span class="match-text">{{ match.text }}</span>
17 |           </div>
18 |         </div>
19 |       </div>
20 |     </div>
21 |   </div>
22 | </template>
23 | 
24 | <script setup>
25 | import { computed } from 'vue'
26 | 
27 | const props = defineProps({
28 |   filePath: {
29 |     type: String,
30 |     default: ''
31 |   },
32 |   matches: {
33 |     type: Array,
34 |     default: () => []
35 |   }
36 | })
37 | 
38 | const hasMatches = computed(() => props.matches.length > 0)
39 | </script>
40 | 
41 | <style scoped>
42 | .code-editor-content {
43 |   padding: var(--space-md);
44 |   position: relative;
45 |   font-family: var(--font-mono);
46 |   font-size: 14px;
47 |   line-height: 1.5;
48 |   overflow-x: auto;
49 | }
50 | 
51 | .code-editor-content pre {
52 |   margin: 0;
53 | }
54 | 
55 | .code-editor-content code {
56 |   background: none;
57 |   padding: 0;
58 | }
59 | 
60 | .match-highlight {
61 |   position: absolute;
62 |   left: 0;
63 |   right: 0;
64 |   height: 24px;
65 |   background: rgba(var(--vp-c-brand-rgb), 0.1);
66 |   pointer-events: none;
67 | }
68 | 
69 | .match-indicator {
70 |   position: absolute;
71 |   right: var(--space-md);
72 |   display: flex;
73 |   align-items: center;
74 |   gap: var(--space-sm);
75 |   font-size: 12px;
76 |   color: var(--text-secondary);
77 | }
78 | 
79 | .match-line {
80 |   background: rgba(var(--vp-c-brand-rgb), 0.2);
81 |   padding: 2px 6px;
82 |   border-radius: var(--radius-sm);
83 | }
84 | 
85 | .match-text {
86 |   opacity: 0.7;
87 | }
88 | 
89 | /* Syntax highlighting */
90 | :deep(.token.string) { color: var(--syntax-string); }
91 | :deep(.token.variable) { color: var(--syntax-variable); }
92 | :deep(.token.function) { color: var(--syntax-function); }
93 | :deep(.token.number) { color: var(--syntax-number); }
94 | :deep(.token.comment) { color: var(--syntax-comment); }
95 | :deep(.token.keyword) { color: var(--syntax-keyword); }
96 | </style> 


--------------------------------------------------------------------------------
/examples/reranker/DEBUG_OUTPUT_ANALYSIS.md:
--------------------------------------------------------------------------------
 1 | # Debug Output Analysis
 2 | 
 3 | Based on the debug output, here's exactly what's happening in our Rust implementation:
 4 | 
 5 | ## 1. Input to score_pair()
 6 | ```
 7 | Query: 'test question'
 8 | Document: '// Filename: ./mcp-agent/src/agent.js\n// AI agent implementation\nimport...'
 9 | ```
10 | 
11 | ## 2. Tokenization (`encode_pair`)
12 | - **Token IDs**: `[101, 3231, 3160, 102, 1013, 1013, 5371, 18442, 1024, ...]`
13 |   - 101 = [CLS]
14 |   - 3231, 3160 = "test question" 
15 |   - 102 = [SEP]
16 |   - Rest = document tokens
17 | 
18 | - **Token Type IDs**: `[0, 0, 0, 0, 1, 1, 1, 1, 1, 1, ...]`
19 |   - First 4 tokens (including [CLS] and [SEP]) = 0 (query segment)
20 |   - Remaining tokens = 1 (document segment)
21 |   - ✅ This is CORRECT!
22 | 
23 | - **Structure**: `[CLS] test question [SEP] // Filename: ./mcp-agent/src/agent.js ...`
24 | 
25 | ## 3. Model Input Tensors
26 | - **input_ids**: Shape [1, 512] - padded to max length
27 | - **attention_mask**: Shape [1, 512] - 1s for real tokens, 0s for padding
28 | - **token_type_ids**: Shape [1, 512] - 0s for query, 1s for document
29 | 
30 | ## 4. BERT Processing
31 | - **CLS output**: Shape [1, 128] (hidden size = 128 for TinyBERT)
32 | - **CLS values**: `[-0.041968495, -0.4378377, 0.58510137, 1.540222, ...]`
33 |   - These are the contextualized embeddings for the [CLS] token
34 | 
35 | ## 5. Classifier Output
36 | - **Logits**: Shape [1, 1] - single score
37 | - **Raw score**: 0.833216 (for this example)
38 | 
39 | ## Key Observations
40 | 
41 | 1. **Tokenization is correct**: Using `encode_pair()` properly generates:
42 |    - Correct special tokens ([CLS], [SEP])
43 |    - Correct token type IDs (0 for query, 1 for document)
44 | 
45 | 2. **Model inputs are correct**: All tensors have the right shape and values
46 | 
47 | 3. **BERT is processing correctly**: Getting proper hidden states
48 | 
49 | 4. **Scores are reasonable**: Raw logits in expected range
50 | 
51 | ## The Real Issue
52 | 
53 | The implementation is correct. The problem is that TinyBERT (4M parameters) produces very similar scores for different queries:
54 | - "test question" → 0.833216
55 | - "how does authentication work" → ~0.85-0.88 (from earlier tests)
56 | 
57 | The model just isn't discriminating well between relevant and irrelevant queries because it's too small.
58 | 
59 | ## To Verify Further
60 | 
61 | Add this temporary debug to see exact token-by-token breakdown:
62 | ```rust
63 | // After encoding
64 | for (i, (token_id, type_id)) in encoding.get_ids().iter()
65 |     .zip(encoding.get_type_ids().iter())
66 |     .enumerate()
67 |     .take(20) {
68 |     let token_text = self.tokenizer.decode(&[*token_id], false).unwrap_or_default();
69 |     println!("  [{}] '{}' (ID: {}, Type: {})", i, token_text, token_id, type_id);
70 | }
71 | ```


--------------------------------------------------------------------------------
/npm/tests/unit/mermaidInfiniteLoopFix.test.js:
--------------------------------------------------------------------------------
 1 | import { jest, beforeEach, describe, it, expect } from '@jest/globals';
 2 | import { validateMermaidDiagram, validateAndFixMermaidResponse, MermaidFixingAgent } from '../../src/agent/schemaUtils.js';
 3 | 
 4 | describe('Mermaid Infinite Loop Fix', () => {
 5 |   describe('Node label quote handling', () => {
 6 |     it('should validate that HTML entities work in Mermaid diagrams', async () => {
 7 |       const diagramWithEntities = `graph TD
 8 |     A["Process &quot;data&quot; file"]
 9 |     B["Handle &#39;special&#39; case"]
10 |     C{"Check &quot;status&quot;"}`;
11 | 
12 |       const validation = await validateMermaidDiagram(diagramWithEntities);
13 | 
14 |       // HTML entities should not trigger single quote validation errors
15 |       if (!validation.isValid) {
16 |         expect(validation.error).not.toMatch(/Single quotes in node label/);
17 |         expect(validation.error).not.toMatch(/got PS/);
18 |       }
19 |     });
20 |   });
21 | 
22 |   describe('Diamond node quote handling', () => {
23 |   });
24 | 
25 |   describe('MermaidFixingAgent should not pass schema to avoid infinite loops', () => {
26 |     it('should call agent.answer without schema parameter', async () => {
27 |       // Create a mock ProbeAgent
28 |       const mockAgent = {
29 |         answer: jest.fn().mockResolvedValue('```mermaid\ngraph TD\n    A --> B\n```')
30 |       };
31 | 
32 |       // Create MermaidFixingAgent and inject mock
33 |       const fixer = new MermaidFixingAgent({ debug: false });
34 |       await fixer.initializeAgent();
35 |       fixer.agent = mockAgent;
36 | 
37 |       // Call fixMermaidDiagram
38 |       const brokenDiagram = 'graph TD\n    A["broken (syntax"]';
39 |       await fixer.fixMermaidDiagram(brokenDiagram, ['line 1: unclosed bracket'], {});
40 | 
41 |       // Verify that answer was called without schema
42 |       expect(mockAgent.answer).toHaveBeenCalled();
43 |       const callArgs = mockAgent.answer.mock.calls[0];
44 |       expect(callArgs[0]).toContain('Analyze and fix'); // prompt
45 |       expect(callArgs[1]).toEqual([]); // messages array
46 | 
47 |       // Critical: verify no schema in options (either no 3rd arg or 3rd arg has no schema)
48 |       if (callArgs.length >= 3) {
49 |         expect(callArgs[2]).not.toHaveProperty('schema');
50 |       }
51 |     });
52 | 
53 |     it('should initialize ProbeAgent with maxIterations set to 10', async () => {
54 |       // Create MermaidFixingAgent
55 |       const fixer = new MermaidFixingAgent({ debug: false });
56 | 
57 |       // Initialize the agent
58 |       const agent = await fixer.initializeAgent();
59 | 
60 |       // Verify maxIterations is set to 10 (increased from 2 to handle complex diagrams)
61 |       expect(agent.maxIterations).toBe(10);
62 |     });
63 |   });
64 | });
65 | 


--------------------------------------------------------------------------------
/src/simd_test.rs:
--------------------------------------------------------------------------------
 1 | use crate::simd_ranking::SparseVector;
 2 | use ahash::AHashMap as HashMap;
 3 | use simsimd::SpatialSimilarity;
 4 | 
 5 | pub fn test_simd_implementation() {
 6 |     println!("Testing SIMD implementation...");
 7 | 
 8 |     // Test 1: Direct SimSIMD dot product
 9 |     let a = vec![1.0f32, 2.0, 3.0];
10 |     let b = vec![4.0f32, 5.0, 6.0];
11 |     let expected_dot = 1.0 * 4.0 + 2.0 * 5.0 + 3.0 * 6.0; // = 4 + 10 + 18 = 32
12 | 
13 |     println!("Testing direct SimSIMD dot product:");
14 |     println!("a = {a:?}, b = {b:?}");
15 |     println!("Expected dot product: {expected_dot}");
16 | 
17 |     if let Some(simd_dot) = f32::dot(&a, &b) {
18 |         println!("SimSIMD dot product: {simd_dot}");
19 |         println!(
20 |             "SimSIMD is working: {}",
21 |             (simd_dot - expected_dot).abs() < 0.001
22 |         );
23 |     } else {
24 |         println!("SimSIMD dot product FAILED!");
25 |     }
26 | 
27 |     // Test 2: Sparse vector operations
28 |     let mut tf_map1 = HashMap::new();
29 |     tf_map1.insert(0u8, 1);
30 |     tf_map1.insert(1u8, 2);
31 |     tf_map1.insert(2u8, 3);
32 | 
33 |     let mut tf_map2 = HashMap::new();
34 |     tf_map2.insert(1u8, 4);
35 |     tf_map2.insert(2u8, 5);
36 |     tf_map2.insert(3u8, 6);
37 | 
38 |     let sparse1 = SparseVector::from_tf_map(&tf_map1);
39 |     let sparse2 = SparseVector::from_tf_map(&tf_map2);
40 | 
41 |     println!("\nTesting sparse vectors:");
42 |     println!(
43 |         "Vector 1: indices={:?}, values={:?}",
44 |         sparse1.indices, sparse1.values
45 |     );
46 |     println!(
47 |         "Vector 2: indices={:?}, values={:?}",
48 |         sparse2.indices, sparse2.values
49 |     );
50 | 
51 |     // Test intersection
52 |     let intersection = sparse1.intersect_indices(&sparse2);
53 |     println!("Intersection: {intersection:?}");
54 | 
55 |     // Test dot product (should be 2*4 + 3*5 = 8 + 15 = 23)
56 |     let dot_product = sparse1.dot_product(&sparse2);
57 |     println!("Sparse SIMD dot product: {dot_product}");
58 | 
59 |     // Test manual calculation
60 |     let manual_dot = sparse1.manual_dot_product(&sparse2);
61 |     println!("Manual dot product: {manual_dot}");
62 | 
63 |     // Test optimized intersection
64 |     let (vals1, vals2) = sparse1.intersect_with_values(&sparse2);
65 |     println!("Intersected values: {vals1:?} • {vals2:?}");
66 | 
67 |     if let Some(direct_simd) = f32::dot(&vals1, &vals2) {
68 |         println!("Direct SIMD on intersected values: {direct_simd}");
69 |     }
70 | 
71 |     assert_eq!(intersection, vec![1, 2]);
72 |     assert_eq!(manual_dot, 23.0);
73 |     assert!(
74 |         (dot_product - 23.0).abs() < 0.001,
75 |         "SIMD dot product should be 23.0, got {dot_product}"
76 |     );
77 | 
78 |     println!("SIMD test completed successfully!");
79 | }
80 | 


--------------------------------------------------------------------------------
/site/.vitepress/theme/components/Feature.vue:
--------------------------------------------------------------------------------
  1 | <template>
  2 |   <div 
  3 |     class="vp-feature" 
  4 |     :class="{ 
  5 |       'vp-feature-with-link': feature.link, 
  6 |       'vp-feature-alt': feature.theme === 'alt',
  7 |       'vp-feature-brand': feature.theme === 'brand',
  8 |       'vp-feature-highlight': feature.highlight
  9 |     }"
 10 |   >
 11 |     <div v-if="feature.icon" class="vp-feature-icon">
 12 |       <span v-if="!feature.image">{{ feature.icon }}</span>
 13 |       <img v-else :src="feature.image" alt="" />
 14 |     </div>
 15 |     <h2 class="vp-feature-title">{{ feature.title }}</h2>
 16 |     <p class="vp-feature-details">{{ feature.details }}</p>
 17 |     <div v-if="feature.link" class="vp-feature-link">
 18 |       <a :href="feature.link">{{ feature.linkText || 'Learn more' }}</a>
 19 |     </div>
 20 |   </div>
 21 | </template>
 22 | 
 23 | <script setup>
 24 | defineProps({
 25 |   feature: {
 26 |     type: Object,
 27 |     required: true
 28 |   }
 29 | })
 30 | </script>
 31 | 
 32 | <style scoped>
 33 | .vp-feature {
 34 |   border: 1px solid var(--vp-c-divider);
 35 |   border-radius: 12px;
 36 |   padding: 24px;
 37 |   height: 100%;
 38 |   background-color: var(--vp-c-bg-soft);
 39 |   transition: all 0.3s ease;
 40 |   display: flex;
 41 |   flex-direction: column;
 42 | }
 43 | 
 44 | @media (max-width: 640px) {
 45 |   .vp-feature {
 46 |     padding: 16px 12px;
 47 |   }
 48 | }
 49 | 
 50 | @media (max-width: 480px) {
 51 |   .vp-feature {
 52 |     padding: 12px 8px;
 53 |   }
 54 | }
 55 | 
 56 | .vp-feature:hover {
 57 |   box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
 58 |   transform: translateY(-2px);
 59 | }
 60 | 
 61 | .vp-feature-icon {
 62 |   font-size: 32px;
 63 |   margin-bottom: 16px;
 64 | }
 65 | 
 66 | .vp-feature-icon img {
 67 |   width: 32px;
 68 |   height: 32px;
 69 | }
 70 | 
 71 | .vp-feature-title {
 72 |   font-size: 18px;
 73 |   font-weight: 600;
 74 |   margin-bottom: 8px;
 75 | }
 76 | 
 77 | .vp-feature-details {
 78 |   flex-grow: 1;
 79 |   font-size: 14px;
 80 |   color: var(--vp-c-text-2);
 81 |   line-height: 1.6;
 82 |   margin-bottom: 16px;
 83 | }
 84 | 
 85 | .vp-feature-link a {
 86 |   font-size: 14px;
 87 |   font-weight: 500;
 88 |   color: var(--vp-c-brand);
 89 |   text-decoration: none;
 90 | }
 91 | 
 92 | .vp-feature-link a:hover {
 93 |   text-decoration: underline;
 94 | }
 95 | 
 96 | .vp-feature-alt {
 97 |   background-color: var(--vp-c-bg-alt);
 98 | }
 99 | 
100 | .vp-feature-brand {
101 |   background-color: var(--vp-c-brand-dimm);
102 |   border-color: var(--vp-c-brand-light);
103 | }
104 | 
105 | .vp-feature-brand .vp-feature-title {
106 |   color: var(--vp-c-brand-dark);
107 | }
108 | 
109 | .vp-feature-highlight {
110 |   border: 2px solid var(--vp-c-brand);
111 |   box-shadow: 0 0 8px rgba(var(--vp-c-brand-rgb), 0.2);
112 | }
113 | </style> 


--------------------------------------------------------------------------------
/npm/src/agent/engines/enhanced-vercel.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Enhanced Vercel AI SDK Engine with proper tool and prompt support
 3 |  */
 4 | 
 5 | import { streamText } from 'ai';
 6 | 
 7 | /**
 8 |  * Create an enhanced Vercel AI SDK engine with full tool support
 9 |  * @param {Object} agent - The ProbeAgent instance
10 |  * @returns {Object} Engine interface
11 |  */
12 | export function createEnhancedVercelEngine(agent) {
13 |   return {
14 |     /**
15 |      * Query the model using existing Vercel AI SDK implementation
16 |      * @param {string} prompt - The prompt to send
17 |      * @param {Object} options - Additional options
18 |      * @returns {AsyncIterable} Response stream
19 |      */
20 |     async *query(prompt, options = {}) {
21 |       // Get the system message with tools embedded (existing behavior)
22 |       const systemMessage = await agent.getSystemMessage();
23 | 
24 |       // Build messages array with system prompt
25 |       const messages = [
26 |         { role: 'system', content: systemMessage },
27 |         ...agent.history,
28 |         { role: 'user', content: prompt }
29 |       ];
30 | 
31 |       // Use existing streamText with retry and fallback
32 |       const result = await agent.streamTextWithRetryAndFallback({
33 |         model: agent.provider(agent.model),
34 |         messages,
35 |         maxTokens: options.maxTokens || agent.maxResponseTokens,
36 |         temperature: options.temperature || 0.3,
37 |         // Note: Vercel AI SDK doesn't use structured tools for XML format
38 |         // The tools are embedded in the system prompt
39 |         experimental_telemetry: options.telemetry
40 |       });
41 | 
42 |       // Stream the response
43 |       let fullContent = '';
44 |       for await (const chunk of result.textStream) {
45 |         fullContent += chunk;
46 |         yield { type: 'text', content: chunk };
47 |       }
48 | 
49 |       // Parse XML tool calls from the response if any
50 |       // This maintains compatibility with existing XML tool format
51 |       const toolCalls = agent.parseXmlToolCalls ? agent.parseXmlToolCalls(fullContent) : null;
52 |       if (toolCalls && toolCalls.length > 0) {
53 |         yield { type: 'tool_calls', toolCalls };
54 |       }
55 | 
56 |       // Handle finish reason
57 |       if (result.finishReason) {
58 |         yield { type: 'finish', reason: result.finishReason };
59 |       }
60 |     },
61 | 
62 |     /**
63 |      * Get available tools for this engine
64 |      */
65 |     getTools() {
66 |       return agent.toolImplementations || {};
67 |     },
68 | 
69 |     /**
70 |      * Get system prompt for this engine
71 |      */
72 |     async getSystemPrompt() {
73 |       return agent.getSystemMessage();
74 |     },
75 | 
76 |     /**
77 |      * Optional cleanup
78 |      */
79 |     async close() {
80 |       // Nothing to cleanup for Vercel AI
81 |     }
82 |   };
83 | }


--------------------------------------------------------------------------------
/npm/tests/README.md:
--------------------------------------------------------------------------------
 1 | # Test Suite for Bundled Binaries
 2 | 
 3 | This directory contains tests for the bundled binary extraction functionality.
 4 | 
 5 | ## Test Files
 6 | 
 7 | ### `extractor.test.js`
 8 | Unit tests for the binary extractor module (`src/extractor.js`).
 9 | 
10 | **Coverage:**
11 | - ✅ Platform detection (Linux, macOS, Windows)
12 | - ✅ Unsupported platform error handling
13 | - ✅ tar.gz archive extraction
14 | - ✅ ZIP archive extraction (Windows)
15 | - ✅ Path traversal security validation
16 | - ✅ Error handling for missing binaries
17 | - ✅ Error handling for empty archives
18 | 
19 | **Security Tests:**
20 | - Path traversal attacks (../ sequences)
21 | - Absolute path rejection
22 | - Malicious archive handling
23 | 
24 | ### `extractor-integration.test.js`
25 | Integration tests that verify the extraction logic without requiring actual binary files.
26 | 
27 | **Coverage:**
28 | - ✅ Platform detection logic for all 5 supported platforms
29 | - ✅ Path safety validation
30 | - ✅ Archive naming conventions
31 | - ✅ Binary name detection (Windows vs Unix)
32 | - ✅ Security validations
33 | 
34 | **Security Tests:**
35 | - `isPathSafe()` logic verification
36 | - Path normalization
37 | - Relative path validation
38 | - Directory traversal prevention
39 | 
40 | ## Running Tests
41 | 
42 | ```bash
43 | # Run all tests
44 | npm test
45 | 
46 | # Run with coverage
47 | npm run test:coverage
48 | 
49 | # Run in watch mode
50 | npm run test:watch
51 | 
52 | # Run verbose
53 | npm run test:verbose
54 | ```
55 | 
56 | ## Security Test Coverage
57 | 
58 | All security-critical functions have test coverage:
59 | 
60 | 1. **Path Traversal Prevention** ✅
61 |    - Tests verify `../ `sequences are rejected
62 |    - Tests verify absolute paths are rejected
63 |    - Tests verify safe relative paths are accepted
64 | 
65 | 2. **Archive Extraction** ✅
66 |    - tar.gz extraction with path validation
67 |    - ZIP extraction with path validation
68 |    - Malicious archive rejection
69 | 
70 | 3. **Platform Detection** ✅
71 |    - All 5 platforms correctly mapped
72 |    - Unsupported platforms throw errors
73 |    - Correct file extensions selected
74 | 
75 | ## Test Dependencies
76 | 
77 | - `@jest/globals` - Test framework
78 | - `fs-extra` - File system operations
79 | - `tar` - tar.gz extraction
80 | - `adm-zip` - ZIP extraction (dynamically imported)
81 | 
82 | ## Notes
83 | 
84 | - Tests use dynamic imports for `adm-zip` to handle cases where it's not yet installed
85 | - Tests skip platform-specific functionality (e.g., Windows ZIP tests on macOS)
86 | - Security tests run on all platforms and verify the core logic
87 | - Integration tests don't require actual binary files, only test the logic
88 | 
89 | ## Coverage Goals
90 | 
91 | - ✅ Lines: >70%
92 | - ✅ Functions: >70%
93 | - ✅ Branches: >70%
94 | - ✅ Statements: >70%
95 | 
96 | Security-critical functions should aim for 100% coverage.
97 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security Policy
 2 | 
 3 | ## Supported Versions
 4 | 
 5 | Probe does not maintain long-term release branches. Security fixes are only included in new releases moving forward, rather than being backported to previous versions.
 6 | 
 7 | | Version | Supported          |
 8 | | ------- | ------------------ |
 9 | | latest  | :white_check_mark: |
10 | | < latest | :x:                |
11 | 
12 | ## Reporting a Vulnerability
13 | 
14 | We take the security of Probe seriously. If you believe you've found a security vulnerability, please follow these steps:
15 | 
16 | ### For Public Vulnerabilities
17 | 
18 | If the vulnerability is not sensitive and does not put users at immediate risk:
19 | 
20 | 1. **Open an Issue**: Create a regular issue on our GitHub repository with the `[security]` prefix in the title.
21 | 2. **Provide Details**: Include a clear description of the vulnerability, steps to reproduce, and potential impact.
22 | 3. **Suggest a Fix**: If possible, suggest how the vulnerability might be addressed.
23 | 
24 | ### For Sensitive Vulnerabilities
25 | 
26 | If the vulnerability is sensitive or could put users at immediate risk:
27 | 
28 | 1. **Do Not Open a Public Issue**: Please do not disclose sensitive vulnerabilities publicly.
29 | 2. **Email the Maintainers**: Send an email to leonsbox@gmail.com with details about the vulnerability.
30 | 3. **Use Encryption**: If possible, encrypt your message using our PGP key (available upon request).
31 | 4. **Be Patient**: We'll acknowledge receipt of your report within 48 hours and provide a timeline for addressing the issue.
32 | 
33 | ## What to Include in Your Report
34 | 
35 | When reporting a vulnerability, please include:
36 | 
37 | - A clear description of the vulnerability
38 | - Steps to reproduce the issue
39 | - Potential impact of the vulnerability
40 | - Any potential mitigations you've identified
41 | - Your contact information for follow-up questions
42 | 
43 | ## Our Commitment
44 | 
45 | When we receive a security report, we will:
46 | 
47 | 1. Confirm receipt of the report within 48 hours
48 | 2. Provide an initial assessment of the report within 7 days
49 | 3. Keep you informed about our progress addressing the issue
50 | 4. Credit you when we release a fix (unless you prefer to remain anonymous)
51 | 
52 | ## Security Update Policy
53 | 
54 | - Security fixes will be released as part of regular new releases
55 | - We do not maintain or backport security fixes to previous versions
56 | - Users are encouraged to always use the latest version of Probe
57 | 
58 | ## Best Practices for Users
59 | 
60 | To minimize security risks when using Probe:
61 | 
62 | 1. Always use the latest version
63 | 2. Be cautious when running Probe on untrusted codebases
64 | 3. Review the permissions granted to Probe in your environment
65 | 4. Follow security best practices for your operating system
66 | 
67 | Thank you for helping keep Probe and its users secure! 


--------------------------------------------------------------------------------
/site/DEPLOYMENT.md:
--------------------------------------------------------------------------------
 1 | # Cloudflare Pages Deployment
 2 | 
 3 | This documentation site is deployed using Cloudflare Pages with automatic deployments from the main branch to **probelabs.com**.
 4 | 
 5 | ## Deployment Configuration
 6 | 
 7 | ### Files
 8 | - `wrangler.toml` - Cloudflare Pages configuration
 9 | - `public/_headers` - HTTP headers for security and caching
10 | - `public/_redirects` - URL redirect rules
11 | - `.env.example` - Environment variable template
12 | 
13 | ### Build Settings
14 | - **Build command**: `npm run build`
15 | - **Build output directory**: `.vitepress/dist`
16 | - **Root directory**: `site`
17 | - **Node.js version**: 20
18 | 
19 | ## Setup Instructions
20 | 
21 | ### 1. Cloudflare Pages Setup
22 | 1. Go to [Cloudflare Pages](https://pages.cloudflare.com/)
23 | 2. Connect your GitHub repository
24 | 3. Configure the build settings:
25 |    - **Project name**: `probe-docs`
26 |    - **Production branch**: `main`
27 |    - **Build command**: `npm run build`
28 |    - **Build output directory**: `.vitepress/dist`
29 |    - **Root directory**: `site`
30 | 
31 | ### 2. Environment Variables
32 | Set these in Cloudflare Pages dashboard if needed:
33 | - `NODE_VERSION`: `20`
34 | - `NPM_VERSION`: `latest`
35 | 
36 | ### 3. Custom Domain Setup
37 | The site is configured to deploy to **probelabs.com**. To set this up:
38 | 
39 | 1. In Cloudflare Pages dashboard, go to Custom domains
40 | 2. Add the domain `probelabs.com`
41 | 3. Add a redirect from `www.probelabs.com` to `probelabs.com` (already configured in `_redirects`)
42 | 4. Update your DNS records:
43 |    - **A Record**: `probelabs.com` → Your Cloudflare Pages IP
44 |    - **CNAME**: `www.probelabs.com` → `probelabs.com`
45 | 5. The domain configuration is already set in `wrangler.toml`
46 | 
47 | ## Build Process
48 | 
49 | The site builds automatically when:
50 | - Code is pushed to the `main` branch
51 | - Pull requests are created (preview deployments)
52 | 
53 | ### Local Development
54 | ```bash
55 | cd site
56 | npm install
57 | npm run dev
58 | ```
59 | 
60 | ### Local Build Test
61 | ```bash
62 | cd site
63 | npm run build
64 | npm run preview
65 | ```
66 | 
67 | ## Troubleshooting
68 | 
69 | ### Common Issues
70 | 1. **Build fails**: Check Node.js version is 20
71 | 2. **Assets not loading**: Verify `public/` directory structure
72 | 3. **404 errors**: Check `_redirects` file configuration
73 | 
74 | ### Logs
75 | Build logs are available in the Cloudflare Pages dashboard under the deployment details.
76 | 
77 | ## Migration Notes
78 | 
79 | This site was migrated from GitHub Pages. The old workflow file has been disabled and renamed to `vitepress-gh-pages.yml.disabled`.
80 | 
81 | ## Performance Features
82 | 
83 | - **Edge deployment**: Served from Cloudflare's global edge network
84 | - **Automatic HTTPS**: SSL certificates managed automatically
85 | - **Caching**: Optimized caching headers for static assets
86 | - **Security headers**: CSP and security headers configured


--------------------------------------------------------------------------------
/npm/src/tools/langchain.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Tools for LangChain
 3 |  * @module tools/langchain
 4 |  */
 5 | 
 6 | import { search } from '../search.js';
 7 | import { query } from '../query.js';
 8 | import { extract } from '../extract.js';
 9 | import { searchSchema, querySchema, extractSchema, searchDescription, queryDescription, extractDescription, parseTargets } from './common.js';
10 | 
11 | // LangChain tool for searching code
12 | export function createSearchTool(options = {}) {
13 | 	const { cwd } = options;
14 | 
15 | 	return {
16 | 		name: 'search',
17 | 		description: searchDescription,
18 | 		schema: searchSchema,
19 | 		func: async ({ query: searchQuery, path, allow_tests, exact, maxResults, maxTokens = 10000, language }) => {
20 | 			try {
21 | 				const results = await search({
22 | 					query: searchQuery,
23 | 					path,
24 | 					cwd, // Working directory for resolving relative paths
25 | 					allowTests: allow_tests ?? true,
26 | 					exact,
27 | 					json: false,
28 | 					maxResults,
29 | 					maxTokens,
30 | 					language
31 | 				});
32 | 
33 | 				return results;
34 | 			} catch (error) {
35 | 				console.error('Error executing search command:', error);
36 | 				return `Error executing search command: ${error.message}`;
37 | 			}
38 | 		}
39 | 	};
40 | }
41 | 
42 | // LangChain tool for querying code
43 | export function createQueryTool(options = {}) {
44 | 	const { cwd } = options;
45 | 
46 | 	return {
47 | 		name: 'query',
48 | 		description: queryDescription,
49 | 		schema: querySchema,
50 | 		func: async ({ pattern, path, language, allow_tests }) => {
51 | 			try {
52 | 				const results = await query({
53 | 					pattern,
54 | 					path,
55 | 					cwd, // Working directory for resolving relative paths
56 | 					language,
57 | 					allowTests: allow_tests ?? true,
58 | 					json: false
59 | 				});
60 | 
61 | 				return results;
62 | 			} catch (error) {
63 | 				console.error('Error executing query command:', error);
64 | 				return `Error executing query command: ${error.message}`;
65 | 			}
66 | 		}
67 | 	};
68 | }
69 | 
70 | // LangChain tool for extracting code
71 | export function createExtractTool(options = {}) {
72 | 	const { cwd } = options;
73 | 
74 | 	return {
75 | 		name: 'extract',
76 | 		description: extractDescription,
77 | 		schema: extractSchema,
78 | 		func: async ({ targets, line, end_line, allow_tests, context_lines, format }) => {
79 | 			try {
80 | 				// Split targets on whitespace to support multiple targets in one call
81 | 				const files = parseTargets(targets);
82 | 
83 | 				const results = await extract({
84 | 					files,
85 | 					cwd, // Working directory for resolving relative paths
86 | 					allowTests: allow_tests ?? true,
87 | 					contextLines: context_lines,
88 | 					format
89 | 				});
90 | 
91 | 				return results;
92 | 			} catch (error) {
93 | 				console.error('Error executing extract command:', error);
94 | 				return `Error executing extract command: ${error.message}`;
95 | 			}
96 | 		}
97 | 	};
98 | }


--------------------------------------------------------------------------------
/tests/extract_input_file_tests.rs:
--------------------------------------------------------------------------------
 1 | use std::fs;
 2 | use std::process::Command;
 3 | use tempfile::TempDir;
 4 | 
 5 | #[test]
 6 | fn test_extract_command_with_input_file() {
 7 |     // Create a temporary directory for our test files
 8 |     let temp_dir = TempDir::new().expect("Failed to create temp dir");
 9 | 
10 |     // Create a test source file with simple content
11 |     let source_file_path = temp_dir.path().join("test_source.rs");
12 |     let source_content = r#"
13 | fn main() {
14 |     println!("Hello, world!");
15 | }
16 | "#;
17 |     fs::write(&source_file_path, source_content).unwrap();
18 | 
19 |     // Create an input file that contains the path to the source file
20 |     let input_file_path = temp_dir.path().join("input.txt");
21 |     fs::write(
22 |         &input_file_path,
23 |         source_file_path.to_string_lossy().as_bytes(),
24 |     )
25 |     .unwrap();
26 | 
27 |     // Get the project root directory (where Cargo.toml is)
28 |     let project_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
29 | 
30 |     // Run the extract command with --help to verify the new option exists
31 |     let help_output = Command::new("cargo")
32 |         .args([
33 |             "run",
34 |             "--manifest-path",
35 |             project_dir.join("Cargo.toml").to_string_lossy().as_ref(),
36 |             "--",
37 |             "extract",
38 |             "--help",
39 |         ])
40 |         .output()
41 |         .expect("Failed to execute help command");
42 | 
43 |     // Check that the help output includes the new option
44 |     let help_text = String::from_utf8_lossy(&help_output.stdout);
45 |     assert!(
46 |         help_text.contains("-F, --input-file"),
47 |         "Help text should include the new --input-file option"
48 |     );
49 | }
50 | 
51 | #[test]
52 | fn test_extract_command_with_nonexistent_input_file() {
53 |     // Get the project root directory (where Cargo.toml is)
54 |     let project_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
55 | 
56 |     // Run the extract command with a nonexistent input file
57 |     let output = Command::new("cargo")
58 |         .args([
59 |             "run",
60 |             "--manifest-path",
61 |             project_dir.join("Cargo.toml").to_string_lossy().as_ref(),
62 |             "--",
63 |             "extract",
64 |             "--input-file",
65 |             "nonexistent_file.txt",
66 |         ])
67 |         .output()
68 |         .expect("Failed to execute command");
69 | 
70 |     // The command should fail
71 |     assert!(
72 |         !output.status.success(),
73 |         "Command should fail with nonexistent file"
74 |     );
75 | 
76 |     // Get the error output as a string
77 |     let stderr = String::from_utf8_lossy(&output.stderr);
78 | 
79 |     // The error should mention the nonexistent file
80 |     assert!(
81 |         stderr.contains("nonexistent_file.txt"),
82 |         "Error should mention the nonexistent file"
83 |     );
84 | }
85 | 


--------------------------------------------------------------------------------
/examples/reranker/test_all_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "🧠 COMPREHENSIVE BERT MODEL COMPARISON"
 4 | echo "======================================"
 5 | echo ""
 6 | 
 7 | cd /Users/leonidbugaev/go/src/code-search/examples/reranker
 8 | 
 9 | echo "=== SEQUENTIAL PERFORMANCE COMPARISON ==="
10 | echo ""
11 | 
12 | echo "🔬 Sequential TinyBERT-L2 (~4M params, fastest):"
13 | ./target/release/benchmark --model "cross-encoder/ms-marco-TinyBERT-L-2-v2" --query "search optimization algorithm" --num-docs 40 --iterations 2 --batch-size 20
14 | 
15 | echo ""
16 | echo "🔬 Sequential MiniLM-L2 (~22M params, balanced):"
17 | ./target/release/benchmark --model "cross-encoder/ms-marco-MiniLM-L-2-v2" --query "search optimization algorithm" --num-docs 40 --iterations 2 --batch-size 20
18 | 
19 | echo ""
20 | echo "🔬 Sequential MiniLM-L6 (~85M params, most accurate):"
21 | ./target/release/benchmark --model "cross-encoder/ms-marco-MiniLM-L-6-v2" --query "search optimization algorithm" --num-docs 40 --iterations 2 --batch-size 20
22 | 
23 | echo ""
24 | echo "=== PARALLEL PERFORMANCE COMPARISON ==="
25 | echo ""
26 | 
27 | echo "🚀 Parallel TinyBERT-L2 (10 cores):"
28 | ./target/release/benchmark --model "cross-encoder/ms-marco-TinyBERT-L-2-v2" --parallel --query "machine learning inference" --num-docs 60 --iterations 2
29 | 
30 | echo ""
31 | echo "🚀 Parallel MiniLM-L2 (10 cores):"
32 | ./target/release/benchmark --model "cross-encoder/ms-marco-MiniLM-L-2-v2" --parallel --query "machine learning inference" --num-docs 60 --iterations 2
33 | 
34 | echo ""
35 | echo "🚀 Parallel MiniLM-L6 (10 cores):"
36 | ./target/release/benchmark --model "cross-encoder/ms-marco-MiniLM-L-6-v2" --parallel --query "machine learning inference" --num-docs 60 --iterations 2
37 | 
38 | echo ""
39 | echo "=== COMPREHENSIVE PERFORMANCE SUMMARY ==="
40 | echo ""
41 | 
42 | echo "📊 BERT MODEL PERFORMANCE ANALYSIS:"
43 | echo ""
44 | echo "| Model        | Parameters | Sequential   | Parallel     | Speedup | Use Case              |"
45 | echo "|--------------|------------|--------------|--------------|---------|----------------------|"
46 | echo "| TinyBERT-L2  | ~4M        | ~32 docs/sec | ~200 docs/sec| ~6x     | High-speed, basic    |"
47 | echo "| MiniLM-L2    | ~22M       | ~8 docs/sec  | ~35 docs/sec | ~4x     | Balanced speed/quality|"
48 | echo "| MiniLM-L6    | ~85M       | ~3 docs/sec  | ~10 docs/sec | ~3x     | High accuracy        |"
49 | echo ""
50 | echo "🎯 RECOMMENDATIONS:"
51 | echo ""
52 | echo "✅ **TinyBERT-L2**: Use for high-throughput applications where speed > accuracy"
53 | echo "✅ **MiniLM-L2**: Best balance of speed and semantic quality (RECOMMENDED)"
54 | echo "✅ **MiniLM-L6**: Use when maximum accuracy is critical, throughput is secondary"
55 | echo ""
56 | echo "🚀 **PARALLEL PROCESSING BENEFITS:**"
57 | echo "• TinyBERT-L2: 6x speedup (32 → 200 docs/sec)"
58 | echo "• MiniLM-L2: 4x speedup (8 → 35 docs/sec)"  
59 | echo "• MiniLM-L6: 3x speedup (3 → 10 docs/sec)"
60 | echo ""
61 | echo "======================================"
62 | echo "🎉 ALL BERT MODELS TESTED SUCCESSFULLY!"
63 | echo "======================================"


--------------------------------------------------------------------------------
/npm/tests/unit/mermaidHtmlEntities.test.js:
--------------------------------------------------------------------------------
 1 | import { jest, describe, it, expect } from '@jest/globals';
 2 | import { validateMermaidDiagram } from '../../src/agent/schemaUtils.js';
 3 | 
 4 | describe('Mermaid HTML Entities Support', () => {
 5 |   it('should accept HTML entities in node labels as valid', async () => {
 6 |     // Test case based on Mermaid documentation best practices
 7 |     const diagramWithEntities = `graph TD
 8 |     A["Process &quot;data&quot; file"]
 9 |     B["Node with &#39;single quotes&#39;"]
10 |     C{"Check &quot;status&quot; value"}
11 |     D["Mixed &quot;double&quot; and &#39;single&#39; quotes"]`;
12 |     
13 |     const validation = await validateMermaidDiagram(diagramWithEntities);
14 |     
15 |     // HTML entities should be valid according to Mermaid specs
16 |     expect(validation.isValid).toBe(true);
17 |     if (!validation.isValid) {
18 |       console.log('Validation error:', validation.error);
19 |     }
20 |   });
21 |   
22 |   it('should accept numeric HTML entities', async () => {
23 |     const diagramWithNumericEntities = `graph TD
24 |     A["Quote: &#34; and apostrophe: &#39;"]
25 |     B["Hash: &#35; and ampersand: &#38;"]`;
26 |     
27 |     const validation = await validateMermaidDiagram(diagramWithNumericEntities);
28 |     expect(validation.isValid).toBe(true);
29 |   });
30 |   
31 |   it('should accept mixed HTML entities and regular text', async () => {
32 |     const diagram = `flowchart LR
33 |     A["Starting point"]
34 |     B["Process &quot;important&quot; data"]
35 |     C["Check if value &#61; &quot;expected&quot;"]
36 |     D["Output: &#39;success&#39; or &#39;failure&#39;"]`;
37 |     
38 |     const validation = await validateMermaidDiagram(diagram);
39 |     expect(validation.isValid).toBe(true);
40 |   });
41 |   
42 |   it('should not flag HTML entities as single quotes error', async () => {
43 |     const diagram = `graph TD
44 |     A["Text with &#39; entity"]`;
45 |     
46 |     const validation = await validateMermaidDiagram(diagram);
47 |     
48 |     // Should not trigger the single quote validation error
49 |     if (!validation.isValid) {
50 |       expect(validation.error).not.toContain('Single quotes in node label');
51 |       expect(validation.error).not.toContain('got PS');
52 |     }
53 |   });
54 |   
55 |   describe('Real-world examples from Mermaid docs', () => {
56 |     it('should handle example from Mermaid documentation', async () => {
57 |       // Example adapted from Mermaid official docs
58 |       const diagram = `flowchart LR
59 |     A["A double quote:&quot;"]
60 |     B["A dec char:&#9829;"]
61 |     C["A hash:&#35;"]`;
62 |       
63 |       const validation = await validateMermaidDiagram(diagram);
64 |       expect(validation.isValid).toBe(true);
65 |     });
66 |     
67 |     it('should handle complex escaping example', async () => {
68 |       // Complex example from StackOverflow Mermaid discussion
69 |       const diagram = `flowchart LR
70 |     B["&quot;&lt;&lt;&gt;&gt;&amp;&#189;&#35;189;&quot;"]`;
71 |       
72 |       const validation = await validateMermaidDiagram(diagram);
73 |       expect(validation.isValid).toBe(true);
74 |     });
75 |   });
76 | });


--------------------------------------------------------------------------------
/tests/nested_symbol_extraction_tests.rs:
--------------------------------------------------------------------------------
  1 | use anyhow::Result;
  2 | use std::fs;
  3 | use std::path::PathBuf;
  4 | 
  5 | #[test]
  6 | fn test_nested_symbol_extraction() -> Result<()> {
  7 |     // Create a temporary test file with nested symbols
  8 |     let test_content = r#"
  9 | struct ProbeAgentServer {
 10 |     client: Client,
 11 |     config: Config,
 12 | }
 13 | 
 14 | impl ProbeAgentServer {
 15 |     pub fn new(client: Client, config: Config) -> Self {
 16 |         Self { client, config }
 17 |     }
 18 | 
 19 |     pub fn setupToolHandlers(&self) {
 20 |         // Setup tool handlers
 21 |         println!("Setting up tool handlers");
 22 | 
 23 |         // Register search handler
 24 |         self.register_handler("search", |params| {
 25 |             // Search implementation
 26 |         });
 27 |     }
 28 | 
 29 |     fn register_handler(&self, name: &str, handler: impl Fn(&str)) {
 30 |         // Register handler implementation
 31 |     }
 32 | }
 33 | "#;
 34 | 
 35 |     // Write the test content to a temporary file
 36 |     let temp_dir = tempfile::tempdir()?;
 37 |     let file_path = temp_dir.path().join("test_nested_symbols.rs");
 38 |     fs::write(&file_path, test_content)?;
 39 | 
 40 |     // Test extracting the nested symbol
 41 |     let result = extract_nested_symbol(&file_path, "ProbeAgentServer.setupToolHandlers")?;
 42 | 
 43 |     // Verify the result contains the setupToolHandlers method
 44 |     assert!(result.contains("pub fn setupToolHandlers"));
 45 |     assert!(result.contains("Setting up tool handlers"));
 46 | 
 47 |     // Clean up
 48 |     temp_dir.close()?;
 49 | 
 50 |     Ok(())
 51 | }
 52 | 
 53 | // Helper function to extract a nested symbol from a file
 54 | fn extract_nested_symbol(path: &PathBuf, symbol: &str) -> Result<String> {
 55 |     // Read the file content
 56 |     let content = fs::read_to_string(path)?;
 57 | 
 58 |     // Call the symbol finder function
 59 |     let result = probe_code::extract::symbol_finder::find_symbol_in_file(
 60 |         path, symbol, &content, true, // allow_tests
 61 |         0,    // context_lines
 62 |     )?;
 63 | 
 64 |     Ok(result.code)
 65 | }
 66 | 
 67 | #[test]
 68 | fn test_simple_symbol_extraction() -> Result<()> {
 69 |     // Create a temporary test file with a simple symbol
 70 |     let test_content = r#"
 71 | struct Config {
 72 |     pub path: String,
 73 |     pub timeout: u64,
 74 | }
 75 | 
 76 | impl Config {
 77 |     pub fn new(path: String, timeout: u64) -> Self {
 78 |         Self { path, timeout }
 79 |     }
 80 | }
 81 | "#;
 82 | 
 83 |     // Write the test content to a temporary file
 84 |     let temp_dir = tempfile::tempdir()?;
 85 |     let file_path = temp_dir.path().join("test_simple_symbol.rs");
 86 |     fs::write(&file_path, test_content)?;
 87 | 
 88 |     // Test extracting a simple symbol
 89 |     let result = extract_nested_symbol(&file_path, "Config")?;
 90 | 
 91 |     // Verify the result contains the Config struct
 92 |     assert!(result.contains("struct Config"));
 93 |     assert!(result.contains("pub path: String"));
 94 | 
 95 |     // Clean up
 96 |     temp_dir.close()?;
 97 | 
 98 |     Ok(())
 99 | }
100 | 


--------------------------------------------------------------------------------