10 |
11 |
12 |
--------------------------------------------------------------------------------
/src/version.rs:
--------------------------------------------------------------------------------
1 | //! Version utilities for probe
2 | //!
3 | //! This module provides utilities for getting version information at runtime.
4 |
5 | /// Get the version string from Cargo.toml
6 | pub fn get_version() -> &'static str {
7 | env!("CARGO_PKG_VERSION")
8 | }
9 |
10 | /// Get the package name from Cargo.toml
11 | pub fn get_package_name() -> &'static str {
12 | env!("CARGO_PKG_NAME")
13 | }
14 |
15 | /// Get a formatted version string with package name
16 | pub fn get_version_info() -> String {
17 | format!("{} {}", get_package_name(), get_version())
18 | }
19 |
20 | #[cfg(test)]
21 | mod tests {
22 | use super::*;
23 |
24 | #[test]
25 | fn test_get_version() {
26 | let version = get_version();
27 | assert!(!version.is_empty());
28 | // Should follow semantic versioning pattern
29 | assert!(version.contains('.'));
30 | }
31 |
32 | #[test]
33 | fn test_get_package_name() {
34 | let name = get_package_name();
35 | assert_eq!(name, "probe-code");
36 | }
37 |
38 | #[test]
39 | fn test_get_version_info() {
40 | let info = get_version_info();
41 | assert!(info.contains("probe-code"));
42 | assert!(info.contains('.'));
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/tests/test_tokenize.rs:
--------------------------------------------------------------------------------
1 | fn main() {
2 | // Import the tokenize function from our probe crate
3 | use probe_code::ranking::tokenize;
4 |
5 | // Test strings
6 | let test_strings = ["The quick brown fox jumps over the lazy dog",
7 | "function calculateTotal(items) { return items.reduce((sum, item) => sum + item.price, 0); }",
8 | "class UserController extends BaseController implements UserInterface",
9 | "Searching for files containing important information",
10 | "Fruitlessly searching for the missing variable in the codebase"];
11 |
12 | println!("Testing tokenization with stop word removal and stemming:\n");
13 |
14 | for (i, test_str) in test_strings.iter().enumerate() {
15 | println!("Original text {}:\n{}", i + 1, test_str);
16 |
17 | // Tokenize with stop word removal and stemming
18 | let tokens = tokenize(test_str);
19 |
20 | println!("Tokens after stop word removal and stemming:");
21 | println!("{tokens:?}");
22 | println!("Number of tokens: {}\n", tokens.len());
23 | }
24 |
25 | // Specific test for stemming
26 | println!("Specific stemming test:");
27 | println!("'fruitlessly' stems to: {}", tokenize("fruitlessly")[0]);
28 | }
29 |
--------------------------------------------------------------------------------
/tests/mocks/test_struct.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import "fmt"
4 |
5 | // Person represents a person with various attributes
6 | type Person struct {
7 | Name string
8 | Age int
9 | Email string
10 | PhoneNumber string
11 | Address Address
12 | }
13 |
14 | // Address represents a physical address
15 | type Address struct {
16 | Street string
17 | City string
18 | State string
19 | ZipCode string
20 | Country string
21 | }
22 |
23 | func main() {
24 | // Create a new person
25 | person := Person{
26 | Name: "John Doe",
27 | Age: 30,
28 | Email: "john.doe@example.com",
29 | PhoneNumber: "555-1234",
30 | Address: Address{
31 | Street: "123 Main St",
32 | City: "Anytown",
33 | State: "CA",
34 | ZipCode: "12345",
35 | Country: "USA",
36 | },
37 | }
38 |
39 | // Print the person's information
40 | fmt.Printf("Name: %s\n", person.Name)
41 | fmt.Printf("Age: %d\n", person.Age)
42 | fmt.Printf("Email: %s\n", person.Email)
43 | fmt.Printf("Phone: %s\n", person.PhoneNumber)
44 | fmt.Printf("Address: %s, %s, %s %s, %s\n",
45 | person.Address.Street,
46 | person.Address.City,
47 | person.Address.State,
48 | person.Address.ZipCode,
49 | person.Address.Country)
50 | }
51 |
--------------------------------------------------------------------------------
/npm/tests/nestedQuoteFix.test.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Nested Quote Fix Tests
3 | *
4 | * NOTE: Most tests in this file have been skipped for maid 0.0.6 integration.
5 | * These tests check OLD regex-based HTML entity handling and quote fixing behavior:
6 | * - Converting ' to '
7 | * - Automatic quote wrapping with escaped inner quotes
8 | * - Specific HTML entity normalization
9 | *
10 | * Maid handles HTML entities and quotes differently using proper parsing.
11 | * Tests marked with .skip check OLD behavior that maid doesn't replicate.
12 | */
13 |
14 | import { validateAndFixMermaidResponse } from '../src/agent/schemaUtils.js';
15 |
16 |
17 |
18 | test('should not double-encode already encoded entities', async () => {
19 | const preEncodedDiagram = `
20 | \`\`\`mermaid
21 | graph TD
22 | A[Text with 'single' quotes]
23 | B[Text with "double" quotes]
24 | \`\`\`
25 | `;
26 |
27 | const result = await validateAndFixMermaidResponse(preEncodedDiagram, {
28 | autoFix: true,
29 | debug: false
30 | });
31 |
32 | expect(result.isValid).toBe(true);
33 |
34 | // Should not double-encode
35 | expect(result.fixedResponse).not.toContain(''');
36 | expect(result.fixedResponse).not.toContain('"');
37 | });
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/src/language/mod.rs:
--------------------------------------------------------------------------------
1 | // Language module - provides functionality for parsing different programming languages
2 | // using tree-sitter and extracting code blocks.
3 |
4 | // Import submodules
5 | pub mod block_handling;
6 | pub mod common;
7 | pub mod factory;
8 | pub mod language_trait;
9 | pub mod parser;
10 | pub mod parser_pool;
11 | pub mod test_detection;
12 | pub mod tree_cache;
13 |
14 | // Language implementations
15 | pub mod c;
16 | pub mod cpp;
17 | pub mod csharp;
18 | pub mod go;
19 | pub mod html;
20 | pub mod java;
21 | pub mod javascript;
22 | pub mod markdown;
23 | pub mod php;
24 | pub mod python;
25 | pub mod ruby;
26 | pub mod rust;
27 | pub mod swift;
28 | pub mod typescript;
29 | pub mod yaml;
30 |
31 | // Re-export items for backward compatibility
32 | pub use parser::{parse_file_for_code_blocks, parse_file_for_code_blocks_with_tree};
33 | pub use parser_pool::{clear_parser_pool, get_pool_stats, get_pooled_parser, return_pooled_parser};
34 | pub use test_detection::is_test_file;
35 | #[allow(unused_imports)]
36 | pub use tree_cache::{
37 | clear_tree_cache, get_cache_size, get_or_parse_tree_pooled, invalidate_cache_entry,
38 | };
39 |
40 | #[cfg(test)]
41 | mod tests;
42 |
43 | #[cfg(test)]
44 | mod javascript_specific_tests;
45 |
46 | #[cfg(test)]
47 | mod typescript_specific_tests;
48 |
--------------------------------------------------------------------------------
/site/.vitepress/theme/components/CommandExample.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | $
5 |
6 |
7 |
8 |
{{ output }}
9 |
10 |
11 |
12 |
13 |
21 |
22 |
--------------------------------------------------------------------------------
/.claude/commands/performance-review.md:
--------------------------------------------------------------------------------
1 | You goal is to measure and improve performance.
2 |
3 | First run `cargo build --release` and remember the current performance: DEBUG=1 ./target/release/probe search "yaml workflow agent multi-agent user input" ~/go/src/semantic-kernel/ --max-tokens 10000 2>/dev/null | sed -n '/=== SEARCH TIMING INFORMATION ===/,/====================================/p'
4 |
5 | Print it to the user.
6 |
7 | Now that you have a baseline, find all the steps which take more then 1 second, and run the seaprate @architecture-agent for each, to plan if we can significantly improve performance. For each suggestion measure confidence. If confidence is high, add it to the detailed plan, if not, say that it is already performance enough.
8 |
9 | Once you went though all the steps and build solid plan, I want you to start implementing it in a separate agent.
10 | But always explicitly ask user before each next implementation.
11 |
12 | Each change should be measured, and compared with our baseline. You can add more debugging to search timing information, or making it more detailed if needed.
13 | Once each change implemented, it should be commited as a separate commit.
14 |
15 | We do care about backward compatibility, about determenistic outputs as well. Be careful. Validate each change by re-running all the tests..
16 |
--------------------------------------------------------------------------------
/src/search/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod file_processing;
2 | pub mod query;
3 | mod result_ranking;
4 | // Replace the old search_execution with new modules
5 | pub mod block_merging;
6 | pub mod cache; // New module for caching search results
7 | pub mod early_ranker; // New module for early BM25 ranking
8 | pub mod elastic_query;
9 | pub mod file_list_cache; // New module for caching file lists
10 | pub mod filters; // New module for search filters (file:, ext:, type:, etc.)
11 | pub mod ripgrep_searcher;
12 | mod search_limiter;
13 | mod search_options;
14 | pub mod search_output;
15 | pub mod search_runner;
16 | pub mod search_tokens;
17 | pub mod simd_pattern_matching;
18 | pub mod simd_tokenization; // SIMD-accelerated tokenization
19 | pub mod term_exceptions; // New module for term exceptions
20 | pub mod timeout; // New module for timeout functionality
21 | pub mod tokenization; // New elastic search query parser
22 | // Temporarily commented out due to compilation issues
23 | // mod temp_frequency_search;
24 |
25 | #[cfg(test)]
26 | mod file_processing_tests;
27 |
28 | #[cfg(test)]
29 | mod test_token_limiter_failures;
30 |
31 | // Public exports
32 | pub use search_options::SearchOptions;
33 | pub use search_output::format_and_print_search_results;
34 | pub use search_runner::perform_probe;
35 |
--------------------------------------------------------------------------------
/site/.vitepress/theme/components/FeatureList.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
10 |
11 |
12 |
13 |
14 |
15 |
22 |
23 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # Use distroless for minimal attack surface and smaller image
2 | FROM gcr.io/distroless/cc-debian12
3 |
4 | # Build arguments for metadata
5 | ARG VERSION=dev
6 | ARG BUILD_DATE
7 | ARG VCS_REF
8 | ARG TARGETARCH
9 |
10 | # Add security and metadata labels
11 | LABEL maintainer="Probe Team" \
12 | description="Probe - Code search tool" \
13 | version="${VERSION}" \
14 | org.opencontainers.image.created="${BUILD_DATE}" \
15 | org.opencontainers.image.source="https://github.com/probelabs/probe" \
16 | org.opencontainers.image.revision="${VCS_REF}" \
17 | org.opencontainers.image.version="${VERSION}" \
18 | org.opencontainers.image.title="Probe" \
19 | org.opencontainers.image.description="AI-friendly code search tool built in Rust"
20 |
21 | # Distroless images run as non-root by default and include CA certificates
22 |
23 | # Copy the pre-built binary based on target architecture
24 | # TARGETARCH is automatically provided by Docker buildx (amd64, arm64)
25 | COPY binaries/${TARGETARCH}/probe /usr/local/bin/probe
26 |
27 | # Health check using the binary (distroless runs as non-root by default)
28 | HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
29 | CMD ["/usr/local/bin/probe", "--version"]
30 |
31 | # Set the default command
32 | ENTRYPOINT ["/usr/local/bin/probe"]
--------------------------------------------------------------------------------
/examples/chat/test-image-spans.js:
--------------------------------------------------------------------------------
1 | import { ProbeChat } from './probeChat.js';
2 |
3 | // Test image extraction with OpenTelemetry spans
4 | async function testImageExtraction() {
5 | console.log('Testing image extraction with OpenTelemetry spans...\n');
6 |
7 | try {
8 | // Create a ProbeChat instance with no API keys mode
9 | const probeChat = new ProbeChat({
10 | debug: true,
11 | noApiKeysMode: true
12 | });
13 |
14 | // Test message with images
15 | const testMessage = `
16 | Here are some images:
17 | - GitHub asset: https://github.com/user-attachments/assets/example.png
18 | - Private image: https://private-user-images.githubusercontent.com/123/example.jpg
19 | - Regular image: https://example.com/photo.jpeg
20 |
21 | And some text without images.
22 | `;
23 |
24 | console.log('🔍 Testing chat with images (no API keys mode)...');
25 | const result = await probeChat.chat(testMessage);
26 | console.log('✅ Chat completed successfully');
27 | console.log('📄 Response:', result.response.substring(0, 100) + '...');
28 |
29 | // Test completed
30 | console.log('\n🎉 Test completed! Check test-image-spans.jsonl for trace data.');
31 |
32 | } catch (error) {
33 | console.error('❌ Test failed:', error.message);
34 | }
35 | }
36 |
37 | testImageExtraction().catch(console.error);
--------------------------------------------------------------------------------
/npm/test-grep.js:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 |
3 | import { grep } from './src/index.js';
4 |
5 | async function testGrep() {
6 | console.log('Testing grep functionality...\n');
7 |
8 | try {
9 | // Test 1: Basic search
10 | console.log('Test 1: Basic search for "TODO" in src directory');
11 | const result1 = await grep({
12 | pattern: 'TODO',
13 | paths: './src',
14 | lineNumbers: true
15 | });
16 | console.log('Result:');
17 | console.log(result1);
18 | console.log('\n---\n');
19 |
20 | // Test 2: Case-insensitive search with count
21 | console.log('Test 2: Count "function" occurrences (case-insensitive)');
22 | const result2 = await grep({
23 | pattern: 'function',
24 | paths: './src',
25 | ignoreCase: true,
26 | count: true
27 | });
28 | console.log('Result:');
29 | console.log(result2);
30 | console.log('\n---\n');
31 |
32 | // Test 3: Files with matches
33 | console.log('Test 3: Files containing "export"');
34 | const result3 = await grep({
35 | pattern: 'export',
36 | paths: './src',
37 | filesWithMatches: true
38 | });
39 | console.log('Result:');
40 | console.log(result3);
41 | console.log('\n---\n');
42 |
43 | console.log('✅ All grep tests passed!');
44 | } catch (error) {
45 | console.error('❌ Test failed:', error.message);
46 | console.error(error);
47 | process.exit(1);
48 | }
49 | }
50 |
51 | testGrep();
52 |
--------------------------------------------------------------------------------
/examples/chat/npm/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "@probelabs/probe-chat",
3 | "version": "1.0.0",
4 | "description": "CLI chat interface for Probe code search",
5 | "main": "index.js",
6 | "type": "module",
7 | "bin": {
8 | "probe-chat": "./bin/probe-chat.js"
9 | },
10 | "scripts": {
11 | "test": "echo \"Error: no test specified\" && exit 1",
12 | "prepublishOnly": "chmod +x ./bin/probe-chat.js"
13 | },
14 | "keywords": [
15 | "probe",
16 | "code-search",
17 | "chat",
18 | "ai",
19 | "cli"
20 | ],
21 | "author": "Leonid Bugaev",
22 | "license": "ISC",
23 | "dependencies": {
24 | "@ai-sdk/anthropic": "^0.0.9",
25 | "@ai-sdk/openai": "^0.0.9",
26 | "@probelabs/probe": "*",
27 | "ai": "^4.1.41",
28 | "chalk": "^5.3.0",
29 | "commander": "^11.1.0",
30 | "dotenv": "^16.3.1",
31 | "inquirer": "^9.2.12",
32 | "ora": "^7.0.1"
33 | },
34 | "engines": {
35 | "node": ">=18.0.0"
36 | },
37 | "repository": {
38 | "type": "git",
39 | "url": "git+https://github.com/probelabs/probe.git"
40 | },
41 | "bugs": {
42 | "url": "https://github.com/probelabs/probe/issues"
43 | },
44 | "homepage": "https://github.com/probelabs/probe#readme",
45 | "publishConfig": {
46 | "access": "public"
47 | },
48 | "files": [
49 | "bin/",
50 | "index.js",
51 | "README.md",
52 | "LICENSE"
53 | ]
54 | }
55 |
--------------------------------------------------------------------------------
/src/language/language_trait.rs:
--------------------------------------------------------------------------------
1 | use tree_sitter::{Language as TSLanguage, Node};
2 |
3 | /// Trait that defines the interface for all language implementations.
4 | pub trait LanguageImpl {
5 | /// Get the tree-sitter language for parsing
6 | fn get_tree_sitter_language(&self) -> TSLanguage;
7 |
8 | /// Check if a node is an acceptable container/parent entity
9 | fn is_acceptable_parent(&self, node: &Node) -> bool;
10 |
11 | /// Check if a node represents a test
12 | fn is_test_node(&self, node: &Node, source: &[u8]) -> bool;
13 |
14 | /// Get the file extension for this language
15 | #[deprecated(since = "0.1.0", note = "this method is not used")]
16 | #[allow(dead_code)]
17 | fn get_extension(&self) -> &'static str;
18 |
19 | /// Find the parent function or method declaration for a node (if any)
20 | fn find_parent_function<'a>(&self, _node: Node<'a>) -> Option> {
21 | // Default implementation returns None
22 | None
23 | }
24 |
25 | /// Extract the symbol signature without implementation body
26 | /// Returns a clean signature for functions, structs, classes, methods, constants, etc.
27 | fn get_symbol_signature(&self, _node: &Node, _source: &[u8]) -> Option {
28 | // Default implementation returns None
29 | // Each language should implement this to extract clean signatures
30 | None
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/examples/chat/test-chat-tracing.js:
--------------------------------------------------------------------------------
1 | import { ProbeChat } from './probeChat.js';
2 |
3 | // Test chat function tracing
4 | async function testChatTracing() {
5 | console.log('Testing chat tracing...\n');
6 |
7 | try {
8 | // Create a ProbeChat instance with debug enabled
9 | const probeChat = new ProbeChat({
10 | debug: true,
11 | noApiKeysMode: true
12 | });
13 |
14 | // Test message with images
15 | const testMessage = 'Here is an image: https://github.com/user-attachments/assets/example.png and some text.';
16 |
17 | console.log('🔍 Testing chat function with tracing...');
18 | console.log('Message:', testMessage);
19 |
20 | // Call the chat function - this should create spans
21 | const result = await probeChat.chat(testMessage);
22 |
23 | console.log('✅ Chat completed successfully');
24 | console.log('📄 Response length:', result.response.length);
25 | console.log('📄 Response preview:', result.response.substring(0, 100) + '...');
26 |
27 | console.log('🎉 Test completed! Check simple-traces.jsonl for trace data.');
28 |
29 | // Wait a bit for telemetry to flush
30 | console.log('⏳ Waiting for telemetry to flush...');
31 | await new Promise(resolve => setTimeout(resolve, 2000));
32 |
33 | } catch (error) {
34 | console.error('❌ Test failed:', error.message);
35 | }
36 | }
37 |
38 | testChatTracing().catch(console.error);
--------------------------------------------------------------------------------
/npm/tests/setup.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Jest setup file
3 | * This file runs before all tests to set up the testing environment
4 | */
5 | import { jest, beforeEach, afterEach } from '@jest/globals';
6 | import fs from 'fs';
7 | import path from 'path';
8 |
9 | // Set environment to test
10 | process.env.NODE_ENV = 'test';
11 |
12 | // Disable debug logging during tests unless explicitly enabled
13 | if (!process.env.TEST_DEBUG) {
14 | process.env.DEBUG = '';
15 | }
16 |
17 | // Prefer local binary in repository to avoid network during tests
18 | try {
19 | const isWin = process.platform === 'win32';
20 | const binDir = path.resolve(__dirname, '..', 'bin');
21 | const candidate = path.join(binDir, isWin ? 'probe.exe' : 'probe-binary');
22 | if (fs.existsSync(candidate)) {
23 | process.env.PROBE_PATH = candidate;
24 | }
25 | } catch {}
26 |
27 | // Global test timeout (can be overridden per test)
28 | jest.setTimeout(10000);
29 |
30 | // Mock console methods to avoid cluttering test output
31 | const originalConsole = { ...console };
32 | beforeEach(() => {
33 | if (!process.env.TEST_VERBOSE) {
34 | console.log = jest.fn();
35 | console.error = jest.fn();
36 | console.warn = jest.fn();
37 | }
38 | });
39 |
40 | afterEach(() => {
41 | if (!process.env.TEST_VERBOSE) {
42 | console.log = originalConsole.log;
43 | console.error = originalConsole.error;
44 | console.warn = originalConsole.warn;
45 | }
46 | });
47 |
--------------------------------------------------------------------------------
/npm/tests/unit/types-probe-agent-options.test.js:
--------------------------------------------------------------------------------
1 | import { describe, test, expect } from '@jest/globals';
2 | import ts from 'typescript';
3 |
4 | /**
5 | * Regression test: ensure the public TypeScript surface exposes tool filtering
6 | * and system prompt options. We compile a tiny snippet and assert no diagnostics.
7 | */
8 | describe('Type definitions: ProbeAgentOptions', () => {
9 | const compile = (source) => {
10 | const result = ts.transpileModule(source, {
11 | compilerOptions: {
12 | target: ts.ScriptTarget.ES2020,
13 | module: ts.ModuleKind.ESNext,
14 | moduleResolution: ts.ModuleResolutionKind.Node16,
15 | strict: true,
16 | skipLibCheck: true,
17 | isolatedModules: true,
18 | allowImportingTsExtensions: true,
19 | types: [],
20 | }
21 | });
22 | return result.diagnostics || [];
23 | };
24 |
25 | test('accepts systemPrompt, allowedTools, and disableTools', () => {
26 | const diagnostics = compile(`
27 | import { ProbeAgent, type ProbeAgentOptions } from '../..';
28 |
29 | const options: ProbeAgentOptions = {
30 | systemPrompt: 'hello',
31 | customPrompt: 'fallback',
32 | allowedTools: ['search', '!bash'],
33 | disableTools: false,
34 | };
35 |
36 | const agent = new ProbeAgent(options);
37 | void agent;
38 | `);
39 |
40 | expect(diagnostics.length).toBe(0);
41 | });
42 | });
43 |
--------------------------------------------------------------------------------
/site/.vitepress/theme/index.js:
--------------------------------------------------------------------------------
1 | import { h } from 'vue'
2 | import DefaultTheme from 'vitepress/theme'
3 | import './custom.css'
4 | import './home.css'
5 | import './blog.css'
6 | import FeatureList from './components/FeatureList.vue'
7 | import CodeEditor from './components/CodeEditor.vue'
8 | import CommandExample from './components/CommandExample.vue'
9 | import BlogPostLayout from './components/BlogPostLayout.vue'
10 | import BlogLayout from './layouts/BlogLayout.vue'
11 | import FeatureSection from '../components/FeatureSection.vue'
12 | import SimpleFeatureSection from '../components/SimpleFeatureSection.vue'
13 | import StarsBackground from '../components/StarsBackground.vue'
14 | import HomeFeatures from '../components/HomeFeatures.vue'
15 |
16 | export default {
17 | ...DefaultTheme,
18 | Layout() {
19 | return h(DefaultTheme.Layout, null, {
20 | 'home-features-after': () => h(FeatureList)
21 | });
22 | },
23 | enhanceApp({ app }) {
24 | // Register global components
25 | app.component('FeatureList', FeatureList)
26 | app.component('CodeEditor', CodeEditor)
27 | app.component('CommandExample', CommandExample)
28 | app.component('BlogPostLayout', BlogPostLayout)
29 | app.component('BlogLayout', BlogLayout)
30 | app.component('FeatureSection', FeatureSection)
31 | app.component('SimpleFeatureSection', SimpleFeatureSection)
32 | app.component('StarsBackground', StarsBackground)
33 | app.component('HomeFeatures', HomeFeatures)
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/npm/src/cli.js:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 |
3 | /**
4 | * CLI wrapper for the probe binary
5 | *
6 | * This script ensures the probe binary is downloaded and then executes it with the provided arguments.
7 | * It's designed to be as lightweight as possible, essentially just passing through to the actual binary.
8 | */
9 |
10 | import { spawn } from 'child_process';
11 | import { getBinaryPath } from './utils.js';
12 |
13 | /**
14 | * Main function
15 | */
16 | async function main() {
17 | try {
18 | // Get the path to the probe binary (this will download it if needed)
19 | const binaryPath = await getBinaryPath();
20 |
21 | // Get the arguments passed to the CLI
22 | const args = process.argv.slice(2);
23 |
24 | // Spawn the probe binary with the provided arguments
25 | const probeProcess = spawn(binaryPath, args, {
26 | stdio: 'inherit' // Pipe stdin/stdout/stderr to the parent process
27 | });
28 |
29 | // Handle process exit
30 | probeProcess.on('close', (code) => {
31 | process.exit(code);
32 | });
33 |
34 | // Handle process errors
35 | probeProcess.on('error', (error) => {
36 | console.error(`Error executing probe binary: ${error.message}`);
37 | process.exit(1);
38 | });
39 | } catch (error) {
40 | console.error(`Error: ${error.message}`);
41 | process.exit(1);
42 | }
43 | }
44 |
45 | // Execute the main function
46 | main().catch(error => {
47 | console.error(`Unexpected error: ${error.message}`);
48 | process.exit(1);
49 | });
--------------------------------------------------------------------------------
/site/.vitepress/components/FullWidthFeatureSection.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
13 |
14 |
--------------------------------------------------------------------------------
/npm/src/agent/shared/Session.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Base Session class for AI provider engines
3 | * Manages conversation state and message counting
4 | */
5 | export class Session {
6 | constructor(id, debug = false) {
7 | this.id = id;
8 | this.conversationId = null; // Provider-specific conversation/thread ID for resumption
9 | this.messageCount = 0;
10 | this.debug = debug;
11 | }
12 |
13 | /**
14 | * Set the conversation ID for session resumption
15 | * @param {string} conversationId - Provider's conversation/thread ID
16 | */
17 | setConversationId(conversationId) {
18 | this.conversationId = conversationId;
19 | if (this.debug) {
20 | console.log(`[Session ${this.id}] Conversation ID: ${conversationId}`);
21 | }
22 | }
23 |
24 | /**
25 | * Increment the message count
26 | */
27 | incrementMessageCount() {
28 | this.messageCount++;
29 | }
30 |
31 | /**
32 | * Get session info as plain object
33 | * @returns {Object} Session information
34 | */
35 | getInfo() {
36 | return {
37 | id: this.id,
38 | conversationId: this.conversationId,
39 | messageCount: this.messageCount
40 | };
41 | }
42 |
43 | /**
44 | * Get resume arguments for CLI commands (used by Claude Code)
45 | * @returns {Array} CLI arguments for resuming conversation
46 | */
47 | getResumeArgs() {
48 | if (this.conversationId && this.messageCount > 0) {
49 | return ['--resume', this.conversationId];
50 | }
51 | return [];
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/examples/chat/test-direct-function.js:
--------------------------------------------------------------------------------
1 | // Test direct function call with telemetry
2 | import { TelemetryConfig } from './telemetry.js';
3 | import { trace } from '@opentelemetry/api';
4 |
5 | // Initialize telemetry first
6 | const telemetryConfig = new TelemetryConfig({
7 | enableFile: true,
8 | enableConsole: true,
9 | filePath: './direct-test-traces.jsonl'
10 | });
11 |
12 | telemetryConfig.initialize();
13 |
14 | // Test function with tracing
15 | function testFunction() {
16 | const tracer = trace.getTracer('direct-test');
17 | return tracer.startActiveSpan('testFunction', (span) => {
18 | try {
19 | console.log('🔍 Inside test function with span');
20 |
21 | span.setAttributes({
22 | 'test.name': 'direct-function-test',
23 | 'test.timestamp': Date.now()
24 | });
25 |
26 | const result = 'Test completed successfully';
27 | span.setStatus({ code: 1 }); // SUCCESS
28 | return result;
29 | } catch (error) {
30 | span.recordException(error);
31 | span.setStatus({ code: 2, message: error.message });
32 | throw error;
33 | } finally {
34 | span.end();
35 | }
36 | });
37 | }
38 |
39 | // Test the function
40 | console.log('Testing direct function call with telemetry...');
41 | const result = testFunction();
42 | console.log('✅ Result:', result);
43 |
44 | // Wait and shutdown
45 | setTimeout(async () => {
46 | console.log('⏳ Shutting down telemetry...');
47 | await telemetryConfig.shutdown();
48 | console.log('🎉 Test completed!');
49 | }, 2000);
--------------------------------------------------------------------------------
/npm/src/agent/storage/InMemoryStorageAdapter.js:
--------------------------------------------------------------------------------
1 | import { StorageAdapter } from './StorageAdapter.js';
2 |
3 | /**
4 | * Default in-memory storage adapter
5 | * This is the default behavior - stores history in a Map in memory
6 | */
7 | export class InMemoryStorageAdapter extends StorageAdapter {
8 | constructor() {
9 | super();
10 | this.sessions = new Map(); // sessionId -> {messages: [], metadata: {}}
11 | }
12 |
13 | async loadHistory(sessionId) {
14 | const session = this.sessions.get(sessionId);
15 | return session ? session.messages : [];
16 | }
17 |
18 | async saveMessage(sessionId, message) {
19 | if (!this.sessions.has(sessionId)) {
20 | this.sessions.set(sessionId, {
21 | messages: [],
22 | metadata: {
23 | createdAt: new Date().toISOString(),
24 | lastActivity: new Date().toISOString()
25 | }
26 | });
27 | }
28 |
29 | const session = this.sessions.get(sessionId);
30 | session.messages.push(message);
31 | session.metadata.lastActivity = new Date().toISOString();
32 | }
33 |
34 | async clearHistory(sessionId) {
35 | this.sessions.delete(sessionId);
36 | }
37 |
38 | async getSessionMetadata(sessionId) {
39 | const session = this.sessions.get(sessionId);
40 | return session ? session.metadata : null;
41 | }
42 |
43 | async updateSessionActivity(sessionId) {
44 | const session = this.sessions.get(sessionId);
45 | if (session) {
46 | session.metadata.lastActivity = new Date().toISOString();
47 | }
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/npm/src/agent/mockProvider.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Mock AI provider for testing purposes
3 | * This provider simulates AI responses without making actual API calls
4 | */
5 |
6 | export function createMockProvider() {
7 | return {
8 | languageModel: (modelName) => ({
9 | modelId: `mock-${modelName}`,
10 | provider: 'mock',
11 |
12 | // Mock the doGenerate method used by Vercel AI SDK
13 | doGenerate: async ({ messages, tools }) => {
14 | // Simulate processing time
15 | await new Promise(resolve => setTimeout(resolve, 10));
16 |
17 | // Return a mock response
18 | return {
19 | text: 'This is a mock response for testing',
20 | toolCalls: [],
21 | usage: {
22 | promptTokens: 10,
23 | completionTokens: 5,
24 | totalTokens: 15
25 | }
26 | };
27 | },
28 |
29 | // Mock the doStream method for streaming responses
30 | doStream: async function* ({ messages, tools }) {
31 | // Simulate streaming response
32 | yield {
33 | type: 'text-delta',
34 | textDelta: 'Mock streaming response'
35 | };
36 |
37 | yield {
38 | type: 'finish',
39 | usage: {
40 | promptTokens: 10,
41 | completionTokens: 5,
42 | totalTokens: 15
43 | }
44 | };
45 | }
46 | })
47 | };
48 | }
49 |
50 | export function createMockModel(modelName = 'mock-model') {
51 | const provider = createMockProvider();
52 | return provider.languageModel(modelName);
53 | }
--------------------------------------------------------------------------------
/npm/bin/binaries/README.md:
--------------------------------------------------------------------------------
1 | # Bundled Probe Binaries
2 |
3 | This directory contains pre-compiled probe binaries for all supported platforms, bundled with the npm package to enable offline installation.
4 |
5 | ## Expected Files
6 |
7 | The CI/CD pipeline should place the following compressed binaries here before publishing to npm:
8 |
9 | - `probe-v{VERSION}-x86_64-unknown-linux-musl.tar.gz` - Linux x64 (static)
10 | - `probe-v{VERSION}-aarch64-unknown-linux-musl.tar.gz` - Linux ARM64 (static)
11 | - `probe-v{VERSION}-x86_64-apple-darwin.tar.gz` - macOS Intel
12 | - `probe-v{VERSION}-aarch64-apple-darwin.tar.gz` - macOS Apple Silicon
13 | - `probe-v{VERSION}-x86_64-pc-windows-msvc.zip` - Windows x64
14 |
15 | ## File Size
16 |
17 | Each compressed binary is approximately 5MB, totaling ~25MB for all 5 platforms.
18 |
19 | ## Installation Flow
20 |
21 | 1. **Postinstall script** (`scripts/postinstall.js`) detects the current platform
22 | 2. **Extraction** (`src/extractor.js`) extracts the matching bundled binary
23 | 3. **Fallback**: If no bundled binary is found, downloads from GitHub releases
24 |
25 | ## CI Integration
26 |
27 | The release workflow (`.github/workflows/release.yml`) should:
28 |
29 | 1. Build binaries for all 5 platforms
30 | 2. Create compressed archives (`.tar.gz` or `.zip`)
31 | 3. Copy them to `npm/bin/binaries/` before running `npm publish`
32 |
33 | Example CI step:
34 | ```yaml
35 | - name: Copy binaries to npm package
36 | run: |
37 | mkdir -p npm/bin/binaries
38 | cp dist/probe-v$VERSION-*.tar.gz npm/bin/binaries/
39 | cp dist/probe-v$VERSION-*.zip npm/bin/binaries/
40 | ```
41 |
--------------------------------------------------------------------------------
/.github/workflows/README-docker.md:
--------------------------------------------------------------------------------
1 | # Docker CI/CD Setup
2 |
3 | This document describes the Docker CI/CD setup for the Probe project.
4 |
5 | ## Required Secrets
6 |
7 | The following secrets need to be configured in your GitHub repository settings:
8 |
9 | 1. **`DOCKER_HUB_TOKEN`** - Docker Hub access token for pushing images
10 | - Create at: https://hub.docker.com/settings/security
11 | - Required permissions: Read, Write, Delete
12 |
13 | ## Optional Variables
14 |
15 | The following variables can be configured in repository settings:
16 |
17 | 1. **`DOCKER_HUB_USERNAME`** - Docker Hub username (defaults to 'buger')
18 |
19 | ## Workflow Integration
20 |
21 | ### release.yml
22 | The Docker build and publish process is integrated into the main release workflow:
23 | - Triggers on version tags (v*)
24 | - Builds multi-platform images (linux/amd64, linux/arm64)
25 | - Publishes versioned images to Docker Hub
26 | - Updates Docker Hub descriptions
27 | - Tags: `X.Y.Z` and `latest`
28 |
29 | The `publish-docker-images` job runs after the binary releases are complete, ensuring all release artifacts are available.
30 |
31 | ## Image Naming
32 |
33 | - Probe CLI: `buger/probe`
34 | - Probe Chat: `buger/probe-chat`
35 |
36 | ## Testing Locally
37 |
38 | ```bash
39 | # Test the full release workflow (including Docker builds)
40 | act -j publish-docker-images --secret DOCKER_HUB_TOKEN=your_token -e <(echo '{"ref": "refs/tags/v1.0.0"}')
41 |
42 | # Test Docker builds locally
43 | docker build -t probe-test .
44 | docker build -t probe-chat-test -f examples/chat/Dockerfile examples/chat
45 |
46 | # Test multi-platform builds locally
47 | docker buildx build --platform linux/amd64,linux/arm64 -t probe-test .
48 | ```
--------------------------------------------------------------------------------
/src/search/timeout.rs:
--------------------------------------------------------------------------------
1 | use std::sync::atomic::{AtomicBool, Ordering};
2 | use std::sync::Arc;
3 | use std::thread;
4 | use std::time::Duration;
5 |
6 | /// Starts a timeout thread that will terminate the process if the timeout is reached.
7 | /// Returns a handle to the timeout thread that can be used to stop it.
8 | pub fn start_timeout_thread(timeout_seconds: u64) -> Arc {
9 | let should_stop = Arc::new(AtomicBool::new(false));
10 | let should_stop_clone = should_stop.clone();
11 |
12 | // For testing purposes, check if we're running in a test environment
13 | let is_test = std::env::var("RUST_TEST_THREADS").is_ok();
14 |
15 | // Use a shorter sleep interval for tests to make timeouts more reliable
16 | let sleep_interval = if is_test {
17 | Duration::from_millis(10) // 100ms for tests
18 | } else {
19 | Duration::from_secs(1) // 1 second for normal operation
20 | };
21 |
22 | thread::spawn(move || {
23 | let mut elapsed_time = Duration::from_secs(0);
24 | let timeout_duration = Duration::from_secs(timeout_seconds);
25 |
26 | while elapsed_time < timeout_duration {
27 | // Check if we should stop the timeout thread
28 | if should_stop_clone.load(Ordering::SeqCst) {
29 | return;
30 | }
31 |
32 | // Sleep for the interval
33 | thread::sleep(sleep_interval);
34 | elapsed_time += sleep_interval;
35 | }
36 |
37 | // Timeout reached, print a message and terminate the process
38 | eprintln!("Search operation timed out after {timeout_seconds} seconds");
39 | std::process::exit(1);
40 | });
41 |
42 | should_stop
43 | }
44 |
--------------------------------------------------------------------------------
/npm/src/agent/mcp/index.js:
--------------------------------------------------------------------------------
1 | /**
2 | * MCP (Model Context Protocol) integration for ProbeAgent
3 | *
4 | * This module provides:
5 | * - MCP client management for connecting to MCP servers
6 | * - XML/JSON hybrid tool interface
7 | * - Configuration management
8 | */
9 |
10 | // Re-export main classes and functions
11 | export { MCPClientManager, createMCPManager, createTransport } from './client.js';
12 | export {
13 | loadMCPConfiguration,
14 | loadMCPConfigurationFromPath,
15 | parseEnabledServers,
16 | createSampleConfig,
17 | saveConfig
18 | } from './config.js';
19 | export {
20 | MCPXmlBridge,
21 | mcpToolToXmlDefinition,
22 | parseXmlMcpToolCall,
23 | parseHybridXmlToolCall,
24 | createHybridSystemMessage
25 | } from './xmlBridge.js';
26 |
27 | // Import for default export
28 | import { MCPClientManager, createMCPManager, createTransport } from './client.js';
29 | import {
30 | loadMCPConfiguration,
31 | loadMCPConfigurationFromPath,
32 | parseEnabledServers,
33 | createSampleConfig,
34 | saveConfig
35 | } from './config.js';
36 | import {
37 | MCPXmlBridge,
38 | mcpToolToXmlDefinition,
39 | parseXmlMcpToolCall,
40 | parseHybridXmlToolCall,
41 | createHybridSystemMessage
42 | } from './xmlBridge.js';
43 |
44 | // Default export for convenience
45 | export default {
46 | // Client
47 | MCPClientManager,
48 | createMCPManager,
49 | createTransport,
50 |
51 | // Config
52 | loadMCPConfiguration,
53 | loadMCPConfigurationFromPath,
54 | parseEnabledServers,
55 | createSampleConfig,
56 | saveConfig,
57 |
58 | // XML Bridge
59 | MCPXmlBridge,
60 | mcpToolToXmlDefinition,
61 | parseXmlMcpToolCall,
62 | parseHybridXmlToolCall,
63 | createHybridSystemMessage
64 | };
--------------------------------------------------------------------------------
/examples/cache_demo.rs:
--------------------------------------------------------------------------------
1 | use probe_code::language::parser::parse_file_for_code_blocks;
2 | use std::collections::HashSet;
3 |
4 | fn main() {
5 | // Set up test content
6 | let content = r#"
7 | fn test_function() {
8 | // This is a comment
9 | let x = 42;
10 | println!("Hello, world!");
11 | }
12 |
13 | struct TestStruct {
14 | field1: i32,
15 | field2: String,
16 | }
17 | "#;
18 |
19 | // Create a set of line numbers to extract
20 | let mut line_numbers = HashSet::new();
21 | line_numbers.insert(3); // Comment line
22 | line_numbers.insert(4); // Code line
23 | line_numbers.insert(8); // Struct field line
24 |
25 | println!("First call (should be a cache miss):");
26 | let result1 = parse_file_for_code_blocks(content, "rs", &line_numbers, true, None).unwrap();
27 | println!("Found {} code blocks", result1.len());
28 |
29 | println!("\nSecond call (should be a cache hit):");
30 | let result2 = parse_file_for_code_blocks(content, "rs", &line_numbers, true, None).unwrap();
31 | println!("Found {} code blocks", result2.len());
32 |
33 | println!("\nThird call with different allow_tests flag (should be a cache miss):");
34 | let result3 = parse_file_for_code_blocks(content, "rs", &line_numbers, false, None).unwrap();
35 | println!("Found {} code blocks", result3.len());
36 |
37 | println!("\nFourth call with different content (should be a cache miss):");
38 | let content2 = r#"
39 | fn different_function() {
40 | // This is a different comment
41 | let y = 100;
42 | }
43 | "#;
44 | let result4 = parse_file_for_code_blocks(content2, "rs", &line_numbers, true, None).unwrap();
45 | println!("Found {} code blocks", result4.len());
46 | }
47 |
--------------------------------------------------------------------------------
/examples/reranker/test_bert_results.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | echo "🔍 REAL BERT RERANKER - QUALITY AND PERFORMANCE ANALYSIS"
4 | echo "========================================================"
5 | echo ""
6 |
7 | cd /Users/leonidbugaev/go/src/code-search/examples/reranker
8 |
9 | echo "=== Performance Analysis ==="
10 | echo ""
11 |
12 | echo "📊 Small scale (10 docs):"
13 | ./target/release/benchmark --query "search algorithm" --num-docs 10 --iterations 3 --batch-size 5
14 |
15 | echo ""
16 | echo "📊 Medium scale (25 docs):"
17 | ./target/release/benchmark --query "async rust programming" --num-docs 25 --iterations 2 --batch-size 10
18 |
19 | echo ""
20 | echo "📊 Large scale (50 docs):"
21 | ./target/release/benchmark --query "machine learning optimization" --num-docs 50 --iterations 1 --batch-size 25
22 |
23 | echo ""
24 | echo "=== Comparison: Demo vs Real BERT ==="
25 | echo ""
26 |
27 | echo "🚀 Demo reranker (mock algorithm):"
28 | ./target/release/benchmark --demo --query "rust async programming" --num-docs 50 --iterations 2 --batch-size 25
29 |
30 | echo ""
31 | echo "🧠 Real BERT reranker:"
32 | ./target/release/benchmark --query "rust async programming" --num-docs 50 --iterations 2 --batch-size 25
33 |
34 | echo ""
35 | echo "========================================================"
36 | echo "✅ REAL BERT PERFORMANCE ANALYSIS COMPLETE"
37 | echo ""
38 | echo "KEY FINDINGS:"
39 | echo "• Real BERT: ~7-8 docs/second (semantic understanding)"
40 | echo "• Demo reranker: ~80,000+ docs/second (simple matching)"
41 | echo "• BERT model loading: ~0.04-0.06 seconds"
42 | echo "• Per-document processing: ~125-130ms"
43 | echo "• Memory usage: ~45MB model + runtime overhead"
44 | echo "========================================================"
--------------------------------------------------------------------------------
/examples/reranker/simple_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Minimal test using sentence-transformers which handles dependencies better
4 | """
5 |
6 | try:
7 | from sentence_transformers import CrossEncoder
8 | print("✓ sentence-transformers imported successfully")
9 | except ImportError:
10 | print("Installing sentence-transformers...")
11 | import subprocess
12 | subprocess.check_call([sys.executable, "-m", "pip", "install", "sentence-transformers"])
13 | from sentence_transformers import CrossEncoder
14 |
15 | # Test inputs
16 | queries = [
17 | "how does authentication work",
18 | "foobar random nonsense gibberish"
19 | ]
20 |
21 | document = """Authentication is the process of verifying the identity of a user, device, or system.
22 | In web applications, authentication typically involves checking credentials like usernames
23 | and passwords against a database."""
24 |
25 | # Load model
26 | print("Loading cross-encoder model...")
27 | model = CrossEncoder('cross-encoder/ms-marco-TinyBERT-L-2-v2', max_length=512)
28 | print("Model loaded!")
29 |
30 | # Score pairs
31 | print("\nScoring query-document pairs:")
32 | print("-" * 50)
33 |
34 | scores = []
35 | for query in queries:
36 | score = model.predict([(query, document)])[0]
37 | scores.append(score)
38 | print(f"Query: '{query}'")
39 | print(f"Score: {score:.6f}\n")
40 |
41 | # Compare
42 | print("Comparison:")
43 | print(f"Relevant query score: {scores[0]:.6f}")
44 | print(f"Nonsense query score: {scores[1]:.6f}")
45 | print(f"Difference: {scores[0] - scores[1]:.6f}")
46 |
47 | if scores[0] > scores[1] + 0.1:
48 | print("\n✓ Good: Relevant query scores higher")
49 | else:
50 | print("\n⚠ Poor discrimination between queries")
--------------------------------------------------------------------------------
/npm/src/agent/storage/StorageAdapter.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Base class for storage adapters
3 | * Implement this interface to provide custom storage backends for ProbeAgent history
4 | */
5 | export class StorageAdapter {
6 | /**
7 | * Load conversation history for a session
8 | * @param {string} sessionId - Session identifier
9 | * @returns {Promise>} Array of message objects with {role, content, ...}
10 | */
11 | async loadHistory(sessionId) {
12 | throw new Error('StorageAdapter.loadHistory() must be implemented by subclass');
13 | }
14 |
15 | /**
16 | * Save a message to storage
17 | * @param {string} sessionId - Session identifier
18 | * @param {Object} message - Message object { role, content, ... }
19 | * @returns {Promise}
20 | */
21 | async saveMessage(sessionId, message) {
22 | throw new Error('StorageAdapter.saveMessage() must be implemented by subclass');
23 | }
24 |
25 | /**
26 | * Clear history for a session
27 | * @param {string} sessionId - Session identifier
28 | * @returns {Promise}
29 | */
30 | async clearHistory(sessionId) {
31 | throw new Error('StorageAdapter.clearHistory() must be implemented by subclass');
32 | }
33 |
34 | /**
35 | * Get session metadata (optional)
36 | * @param {string} sessionId - Session identifier
37 | * @returns {Promise