├── mise.toml
├── .editorconfig
├── src
    ├── main.ts
    ├── types
    │   ├── server.ts
    │   ├── database.ts
    │   ├── index.ts
    │   ├── tools.ts
    │   └── browser.ts
    ├── tools
    │   ├── findApis.ts
    │   ├── chatPerplexity.ts
    │   ├── getDocumentation.ts
    │   ├── checkDeprecatedCode.ts
    │   ├── extractUrlContent.ts
    │   └── search.ts
    ├── utils
    │   ├── logging.ts
    │   ├── db.ts
    │   ├── fetch.ts
    │   └── puppeteer-logic.ts
    ├── server
    │   ├── config.ts
    │   ├── __tests__
    │   │   └── toolHandlerSetup.test.ts
    │   ├── modules
    │   │   ├── DatabaseManager.ts
    │   │   ├── BrowserManager.ts
    │   │   └── SearchEngine.ts
    │   ├── toolHandlerSetup.ts
    │   └── PerplexityServer.ts
    ├── __tests__
    │   ├── unit
    │   │   ├── utils.test.ts
    │   │   ├── config.test.ts
    │   │   ├── schemas.test.ts
    │   │   ├── types.test.ts
    │   │   ├── extraction.test.ts
    │   │   ├── database.test.ts
    │   │   ├── puppeteer-logic.test.ts
    │   │   ├── db.test.ts
    │   │   ├── logging.test.ts
    │   │   └── tools.test.ts
    │   └── integration
    │   │   └── server.test.ts
    ├── login.ts
    └── schema
    │   └── toolSchemas.ts
├── biome.json
├── tsconfig.json
├── .gitignore
├── vitest.config.ts
├── package.json
└── README.md


/mise.toml:
--------------------------------------------------------------------------------
1 | [tools]
2 | node = "20"
3 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | charset = utf-8
 5 | indent_style = space
 6 | indent_size = 2
 7 | end_of_line = lf
 8 | insert_final_newline = true
 9 | trim_trailing_whitespace = true
10 | 


--------------------------------------------------------------------------------
/src/main.ts:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 | 
3 | import { PerplexityServer } from "./server/PerplexityServer.js";
4 | 
5 | // Create and start the server
6 | const server = new PerplexityServer();
7 | await server.run();
8 | 


--------------------------------------------------------------------------------
/src/types/server.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Server module and dependency injection type definitions
 3 |  */
 4 | import type { IBrowserManager } from "./browser.js";
 5 | import type { IDatabaseManager } from "./database.js";
 6 | import type { ISearchEngine } from "./tools.js";
 7 | 
 8 | // ─── SERVER DEPENDENCY INJECTION ──────────────────────────────────────
 9 | export interface ServerDependencies {
10 |   browserManager?: IBrowserManager;
11 |   searchEngine?: ISearchEngine;
12 |   databaseManager?: IDatabaseManager;
13 | }
14 | 


--------------------------------------------------------------------------------
/src/types/database.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Database and Chat related type definitions
 3 |  */
 4 | // ─── CHAT & DATABASE TYPES ────────────────────────────────────────────
 5 | export interface ChatMessage {
 6 |   role: "user" | "assistant";
 7 |   content: string;
 8 | }
 9 | 
10 | export interface ChatResult {
11 |   chat_id: string;
12 |   response: string;
13 | }
14 | 
15 | // ─── DATABASE MANAGER INTERFACE ───────────────────────────────────────
16 | export interface IDatabaseManager {
17 |   initialize(): void;
18 |   getChatHistory(chatId?: string): ChatMessage[];
19 |   saveChatMessage(chatId: string, role: "user" | "assistant", content: string): void;
20 |   close(): void;
21 |   isInitialized(): boolean;
22 | }
23 | 


--------------------------------------------------------------------------------
/biome.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json",
 3 |   "vcs": {
 4 |     "enabled": true,
 5 |     "clientKind": "git",
 6 |     "useIgnoreFile": true
 7 |   },
 8 |   "files": {
 9 |     "ignoreUnknown": false,
10 |     "ignore": ["build/", "node_modules/"]
11 |   },
12 |   "formatter": {
13 |     "enabled": true,
14 |     "indentStyle": "space",
15 |     "indentWidth": 2,
16 |     "lineWidth": 100
17 |   },
18 |   "organizeImports": {
19 |     "enabled": true
20 |   },
21 |   "linter": {
22 |     "enabled": true,
23 |     "rules": {
24 |       "recommended": true
25 |     }
26 |   },
27 |   "javascript": {
28 |     "formatter": {
29 |       "quoteStyle": "double"
30 |     }
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2022",
 4 |     "module": "ESNext",
 5 |     "moduleResolution": "node",
 6 |     "lib": ["ES2022"],
 7 |     "outDir": "build",
 8 |     "rootDir": "src",
 9 |     "strict": true,
10 |     "esModuleInterop": true,
11 |     "allowSyntheticDefaultImports": true,
12 |     "forceConsistentCasingInFileNames": true,
13 |     "skipLibCheck": true,
14 |     "noUncheckedIndexedAccess": true,
15 |     "noPropertyAccessFromIndexSignature": true,
16 |     "verbatimModuleSyntax": true,
17 |     "resolveJsonModule": true,
18 |     "types": ["vitest/globals", "node", "bun-types"]
19 |   },
20 |   "include": ["src/**/*"],
21 |   "exclude": ["docs/", "build/", "node_modules/"]
22 | }
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Dependencies
 2 | node_modules/**
 3 | 
 4 | # Build files
 5 | build/
 6 | 
 7 | # Environment variables
 8 | .env
 9 | *.env
10 | 
11 | # IDE files
12 | .vscode/
13 | .idea/
14 | 
15 | # Logs
16 | logs/
17 | *.log
18 | 
19 | # Local database
20 | chat_history.db
21 | 
22 | # Lock file (User requested)
23 | package-lock.json
24 | 
25 | # Local Dev
26 | memory-bank/
27 | docs/zeugs/
28 | .repomix/
29 | .cursor/
30 | .cursor*
31 | .aider*
32 | # Coverage
33 | node_modules/
34 | coverage/*.css
35 | coverage/*.js
36 | coverage/*.png
37 | coverage/index.html
38 | coverage/lcov-report/
39 | coverage/utils/
40 | coverage/types/
41 | coverage/tools/
42 | coverage/server/
43 | coverage/schema/
44 | !coverage/lcov.info
45 | coverage/coverage-final.json
46 | 
47 | #bun
48 | bun.lockb
49 | bun.lock
50 | 


--------------------------------------------------------------------------------
/src/types/index.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Main type definitions export file
 3 |  * Centralized exports from focused type modules
 4 |  */
 5 | 
 6 | // ─── BROWSER & PUPPETEER TYPES ────────────────────────────────────────
 7 | export type {
 8 |   BrowserConfig,
 9 |   RecoveryContext,
10 |   ErrorAnalysis,
11 |   PuppeteerContext,
12 |   IBrowserManager,
13 |   PageContentResult,
14 |   RecursiveFetchResult,
15 | } from "./browser.js";
16 | 
17 | // ─── DATABASE & CHAT TYPES ────────────────────────────────────────────
18 | export type {
19 |   ChatMessage,
20 |   ChatResult,
21 |   IDatabaseManager,
22 | } from "./database.js";
23 | 
24 | // ─── TOOL & SEARCH TYPES ──────────────────────────────────────────────
25 | export type {
26 |   ISearchEngine,
27 |   ToolHandler,
28 |   ToolHandlersRegistry,
29 |   ChatPerplexityArgs,
30 |   ExtractUrlContentArgs,
31 |   GetDocumentationArgs,
32 |   FindApisArgs,
33 |   CheckDeprecatedCodeArgs,
34 |   SearchArgs,
35 |   ToolArgs,
36 | } from "./tools.js";
37 | 
38 | // ─── SERVER TYPES ─────────────────────────────────────────────────────
39 | export type { ServerDependencies } from "./server.js";
40 | 


--------------------------------------------------------------------------------
/src/tools/findApis.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Tool implementation for finding APIs
 3 |  */
 4 | 
 5 | import type { PuppeteerContext } from "../types/index.js";
 6 | 
 7 | /**
 8 |  * Handles API discovery and comparison
 9 |  */
10 | export default async function findApis(
11 |   args: { requirement: string; context?: string },
12 |   ctx: PuppeteerContext,
13 |   performSearch: (prompt: string, ctx: PuppeteerContext) => Promise<string>,
14 | ): Promise<string> {
15 |   const { requirement, context = "" } = args;
16 |   const prompt = `Find and evaluate APIs that could be used for: ${requirement}. ${
17 |     context ? `Context: ${context}` : ""
18 |   } For each API, provide:
19 | 1. Name and brief description
20 | 2. Key features and capabilities
21 | 3. Pricing model and rate limits
22 | 4. Authentication methods
23 | 5. Integration complexity
24 | 6. Documentation quality and examples
25 | 7. Community support and popularity
26 | 8. Any potential limitations or concerns
27 | 9. Code examples for basic usage
28 | 10. Comparison with similar APIs
29 | 11. SDK availability and language support`;
30 |   return await performSearch(prompt, ctx);
31 | }
32 | 


--------------------------------------------------------------------------------
/src/tools/chatPerplexity.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Tool implementation for chat functionality with Perplexity
 3 |  */
 4 | 
 5 | import crypto from "node:crypto";
 6 | import type { ChatMessage, PuppeteerContext } from "../types/index.js";
 7 | 
 8 | /**
 9 |  * Handles chat interactions with conversation history
10 |  */
11 | export default async function chatPerplexity(
12 |   args: { message: string; chat_id?: string },
13 |   ctx: PuppeteerContext,
14 |   performSearch: (prompt: string, ctx: PuppeteerContext) => Promise<string>,
15 |   getChatHistory: (chat_id: string) => ChatMessage[],
16 |   saveChatMessage: (chat_id: string, message: ChatMessage) => void,
17 | ): Promise<string> {
18 |   const { message, chat_id = crypto.randomUUID() } = args;
19 |   const history = getChatHistory(chat_id);
20 |   const userMessage: ChatMessage = { role: "user", content: message };
21 |   saveChatMessage(chat_id, userMessage);
22 | 
23 |   let conversationPrompt = "";
24 |   for (const msg of history) {
25 |     conversationPrompt +=
26 |       msg.role === "user" ? `User: ${msg.content}\n` : `Assistant: ${msg.content}\n`;
27 |   }
28 |   conversationPrompt += `User: ${message}\n`;
29 | 
30 |   return await performSearch(conversationPrompt, ctx);
31 | }
32 | 


--------------------------------------------------------------------------------
/vitest.config.ts:
--------------------------------------------------------------------------------
 1 | import { defineConfig } from "vitest/config";
 2 | 
 3 | export default defineConfig({
 4 |   test: {
 5 |     globals: true,
 6 |     environment: "node",
 7 |     root: "./",
 8 |     // Only include tests from src directory, not compiled build directory
 9 |     include: ["src/**/*.{test,spec}.{js,ts}"],
10 |     exclude: [
11 |       "build/**/*", // Explicitly exclude all build directory files
12 |       "node_modules/**/*",
13 |       "docs/**/*",
14 |       "scripts/**/*",
15 |     ],
16 |     coverage: {
17 |       provider: "v8",
18 |       reportsDirectory: "coverage",
19 |       reporter: ["text", "lcov", "html", "json"],
20 |       include: ["src/**/*.ts"],
21 |       exclude: [
22 |         "build",
23 |         "scripts",
24 |         "docs",
25 |         "**/*.d.ts",
26 |         "**/node_modules/**",
27 |         "**/vitest.config.ts",
28 |         "**/*.test.ts",
29 |         "**/*.spec.ts",
30 |         "**/__tests__/**",
31 |         "src/main.ts", // Entry point, not core logic
32 |       ],
33 |       thresholds: {
34 |         statements: 1.8,
35 |         branches: 1.8,
36 |         functions: 1.8,
37 |         lines: 1.8,
38 |       },
39 |     },
40 |     testTimeout: 10000, // 10 seconds for integration tests
41 |     hookTimeout: 10000,
42 |   },
43 | });
44 | 


--------------------------------------------------------------------------------
/src/tools/getDocumentation.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Tool implementation for documentation retrieval
 3 |  */
 4 | 
 5 | import type { PuppeteerContext } from "../types/index.js";
 6 | 
 7 | /**
 8 |  * Handles documentation fetching and formatting
 9 |  */
10 | export default async function getDocumentation(
11 |   args: { query: string; context?: string },
12 |   ctx: PuppeteerContext,
13 |   performSearch: (prompt: string, ctx: PuppeteerContext) => Promise<string>,
14 | ): Promise<string> {
15 |   const { query, context = "" } = args;
16 |   const prompt = `Provide comprehensive documentation and usage examples for ${query}. ${
17 |     context ? `Focus on: ${context}` : ""
18 |   } Include:
19 | 1. Basic overview and purpose
20 | 2. Key features and capabilities
21 | 3. Installation/setup if applicable
22 | 4. Common usage examples with code snippets
23 | 5. Best practices and performance considerations
24 | 6. Common pitfalls to avoid
25 | 7. Version compatibility information
26 | 8. Links to official documentation
27 | 9. Community resources (forums, chat channels)
28 | 10. Related tools/libraries that work well with it
29 | 
30 | Crucially, also provide the main official URL(s) for this documentation on separate lines, prefixed with 'Official URL(s):'.`;
31 |   return await performSearch(prompt, ctx);
32 | }
33 | 


--------------------------------------------------------------------------------
/src/tools/checkDeprecatedCode.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Tool handler for 'check_deprecated_code'.
 3 |  * Analyzes code for deprecated features or patterns and suggests replacements, using the Perplexity search logic.
 4 |  * @param args - { code: string; technology?: string }
 5 |  * @param ctx - PuppeteerContext for browser operations
 6 |  * @param performSearch - Function to perform the search (prompt: string, ctx: PuppeteerContext) => Promise<string>
 7 |  * @returns The deprecation analysis string result
 8 |  */
 9 | import type { PuppeteerContext } from "../types/index.js";
10 | 
11 | export default async function checkDeprecatedCode(
12 |   args: { code: string; technology?: string },
13 |   ctx: PuppeteerContext,
14 |   performSearch: (prompt: string, ctx: PuppeteerContext) => Promise<string>,
15 | ): Promise<string> {
16 |   const { code, technology = "" } = args;
17 |   const prompt = `Analyze this code for deprecated features or patterns${
18 |     technology ? ` in ${technology}` : ""
19 |   }:
20 | 
21 | ${code}
22 | 
23 | Please provide:
24 | 1. Identification of deprecated features/methods
25 | 2. Current recommended alternatives
26 | 3. Step-by-step migration guide
27 | 4. Impact assessment of the changes
28 | 5. Deprecation timeline if available
29 | 6. Code examples before/after updating
30 | 7. Performance implications
31 | 8. Backward compatibility considerations
32 | 9. Testing recommendations for the changes`;
33 |   return await performSearch(prompt, ctx);
34 | }
35 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "perplexity-mcp-zerver",
 3 |   "version": "0.3.1",
 4 |   "description": "MCP server using Puppeteer to interact with Perplexity.ai without an API key.",
 5 |   "main": "build/main.js",
 6 |   "scripts": {
 7 |     "build": "tsc",
 8 |     "start": "bun build/main.js",
 9 |     "login": "bun build/login.js",
10 |     "test": "vitest",
11 |     "test:run": "vitest run",
12 |     "test:coverage": "vitest run --coverage",
13 |     "test:watch": "vitest --watch",
14 |     "lint": "biome lint . --write",
15 |     "format": "biome format . --write",
16 |     "dev": "bun run build && bun run start"
17 |   },
18 |   "keywords": [
19 |     "mcp",
20 |     "perplexity",
21 |     "puppeteer",
22 |     "ai",
23 |     "research"
24 |   ],
25 |   "author": "sm-moshi",
26 |   "license": "GPL-3.0-or-later",
27 |   "type": "module",
28 |   "dependencies": {
29 |     "@modelcontextprotocol/sdk": "1.12.3",
30 |     "@mozilla/readability": "0.6.0",
31 |     "@types/axios": "0.9.36",
32 |     "@types/jsdom": "21.1.7",
33 |     "@types/mozilla__readability": "0.4.2",
34 |     "axios": "1.10.0",
35 |     "jsdom": "26.1.0",
36 |     "minimist": "1.2.8",
37 |     "prune": "0.0.2",
38 |     "puppeteer": "24.10.1",
39 |     "zod": "3.25.67"
40 |   },
41 |   "devDependencies": {
42 |     "@biomejs/biome": "1.9.4",
43 |     "@types/minimist": "1.2.5",
44 |     "@types/node": "22.15.30",
45 |     "@vitest/coverage-v8": "3.2.2",
46 |     "bun-types": "latest",
47 |     "typescript": "5.8.3",
48 |     "vitest": "3.2.2"
49 |   }
50 | }


--------------------------------------------------------------------------------
/src/utils/logging.ts:
--------------------------------------------------------------------------------
 1 | export type LogLevel = "info" | "warn" | "error";
 2 | 
 3 | /**
 4 |  * Modular logging utility for MCP servers.
 5 |  * - All logs (info, warn, error) are written to stderr (console.error) to avoid corrupting MCP JSON protocol on stdout.
 6 |  * - Supports log levels, timestamps, and optional metadata.
 7 |  * - Used everywhere in the codebase for consistency.
 8 |  * - Easily extensible for future needs (e.g., file/remote logging).
 9 |  */
10 | export function log(level: LogLevel, message: string, meta?: Record<string, unknown>): void {
11 |   // Only log errors and critical info to reduce noise
12 |   if (level === "warn" && !message.includes("CAPTCHA") && !message.includes("failed")) {
13 |     return; // Skip most warnings
14 |   }
15 |   
16 |   const timestamp = new Date().toISOString();
17 |   // Always use console.error for all log levels to keep stdout clean for MCP protocol
18 |   if (meta && Object.keys(meta).length > 0) {
19 |     // eslint-disable-next-line no-console
20 |     console.error(`[${timestamp}] [${level.toUpperCase()}] ${message}`, meta);
21 |   } else {
22 |     // eslint-disable-next-line no-console
23 |     console.error(`[${timestamp}] [${level.toUpperCase()}] ${message}`);
24 |   }
25 | }
26 | 
27 | export const logInfo = (msg: string, meta?: Record<string, unknown>) => log("info", msg, meta);
28 | export const logWarn = (msg: string, meta?: Record<string, unknown>) => log("warn", msg, meta);
29 | export const logError = (msg: string, meta?: Record<string, unknown>) => log("error", msg, meta);
30 | 


--------------------------------------------------------------------------------
/src/server/config.ts:
--------------------------------------------------------------------------------
 1 | import { homedir } from "node:os";
 2 | import { join } from "node:path";
 3 | 
 4 | export const CONFIG = {
 5 |   // Browser profile settings for Pro account persistence
 6 |   BROWSER_DATA_DIR: process.env["PERPLEXITY_BROWSER_DATA_DIR"] || join(homedir(), ".perplexity-mcp"),
 7 |   USE_PERSISTENT_PROFILE: process.env["PERPLEXITY_PERSISTENT_PROFILE"] !== "false",
 8 | 
 9 |   SEARCH_COOLDOWN: 5000, // Restored from backup.ts for better Cloudflare handling
10 |   PAGE_TIMEOUT: 180000, // Restored from backup.ts (3 minutes) for Cloudflare challenges
11 |   SELECTOR_TIMEOUT: 90000, // Restored from backup.ts (1.5 minutes) for slow loading
12 |   MAX_RETRIES: 10, // Restored from backup.ts for better resilience
13 |   MCP_TIMEOUT_BUFFER: 60000, // Restored from backup.ts
14 |   ANSWER_WAIT_TIMEOUT: 120000, // Restored from backup.ts (2 minutes)
15 |   RECOVERY_WAIT_TIME: 15000, // Restored from backup.ts
16 |   USER_AGENT:
17 |     "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
18 |   TIMEOUT_PROFILES: {
19 |     navigation: 45000, // Restored from backup.ts for Cloudflare navigation
20 |     selector: 15000, // Restored from backup.ts
21 |     content: 120000, // Restored from backup.ts (2 minutes)
22 |     recovery: 30000, // Restored from backup.ts
23 |   },
24 |   DEBUG: {
25 |     CAPTURE_SCREENSHOTS: true, // Enable/disable debug screenshots
26 |     MAX_SCREENSHOTS: 5, // Maximum number of screenshots to keep
27 |     SCREENSHOT_ON_RECOVERY_SUCCESS: false, // Don't screenshot successful recoveries
28 |   },
29 | } as const;
30 | 
31 | 


--------------------------------------------------------------------------------
/src/types/tools.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Tool arguments and results type definitions
 3 |  */
 4 | 
 5 | // ─── SEARCH ENGINE INTERFACE ──────────────────────────────────────────
 6 | export interface ISearchEngine {
 7 |   performSearch(query: string): Promise<string>;
 8 | }
 9 | 
10 | // ─── TOOL HANDLER TYPES ───────────────────────────────────────────────
11 | export type ToolHandler = (args: Record<string, unknown>) => Promise<string>;
12 | 
13 | export interface ToolHandlersRegistry {
14 |   test_tool?: ToolHandler;
15 |   existing_tool?: ToolHandler;
16 |   failing_tool?: ToolHandler;
17 |   timeout_tool?: ToolHandler;
18 |   chat_perplexity?: ToolHandler;
19 |   get_documentation?: ToolHandler;
20 |   find_apis?: ToolHandler;
21 |   check_deprecated_code?: ToolHandler;
22 |   search?: ToolHandler;
23 |   extract_url_content?: ToolHandler;
24 |   // Allow additional tools via index signature
25 |   [key: string]: ToolHandler | undefined;
26 | }
27 | 
28 | // ─── TOOL ARGUMENT TYPES ──────────────────────────────────────────────
29 | export interface ChatPerplexityArgs {
30 |   message: string;
31 |   chat_id?: string;
32 | }
33 | 
34 | export interface ExtractUrlContentArgs {
35 |   url: string;
36 |   depth?: number;
37 | }
38 | 
39 | export interface GetDocumentationArgs {
40 |   query: string;
41 |   context?: string;
42 | }
43 | 
44 | export interface FindApisArgs {
45 |   requirement: string;
46 |   context?: string;
47 | }
48 | 
49 | export interface CheckDeprecatedCodeArgs {
50 |   code: string;
51 |   technology?: string;
52 | }
53 | 
54 | export interface SearchArgs {
55 |   query: string;
56 |   detail_level?: "brief" | "normal" | "detailed";
57 | }
58 | 
59 | // ─── UNION TYPES ──────────────────────────────────────────────────────
60 | export type ToolArgs =
61 |   | ChatPerplexityArgs
62 |   | ExtractUrlContentArgs
63 |   | GetDocumentationArgs
64 |   | FindApisArgs
65 |   | CheckDeprecatedCodeArgs
66 |   | SearchArgs;
67 | 


--------------------------------------------------------------------------------
/src/utils/db.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Database utility functions for chat message storage and retrieval
 3 |  */
 4 | 
 5 | import type { Database } from "bun:sqlite";
 6 | import type { ChatMessage } from "../types/index.js";
 7 | 
 8 | /**
 9 |  * Initializes the SQLite database schema for chat storage
10 |  */
11 | export function initializeDatabase(db: Database): void {
12 |   db.exec(`
13 |     CREATE TABLE IF NOT EXISTS chats (
14 |       id TEXT PRIMARY KEY,
15 |       created_at DATETIME DEFAULT CURRENT_TIMESTAMP
16 |     )
17 |   `);
18 |   db.exec(`
19 |     CREATE TABLE IF NOT EXISTS messages (
20 |       id INTEGER PRIMARY KEY AUTOINCREMENT,
21 |       chat_id TEXT NOT NULL,
22 |       role TEXT NOT NULL,
23 |       content TEXT NOT NULL,
24 |       created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
25 |       FOREIGN KEY (chat_id) REFERENCES chats(id)
26 |     )
27 |   `);
28 | }
29 | 
30 | /**
31 |  * Retrieves the chat history for a given chat ID.
32 |  * @param db The bun:sqlite Database instance.
33 |  * @param chatId The chat session ID.
34 |  * @returns An array of chat messages.
35 |  */
36 | export function getChatHistory(db: Database, chatId: string): ChatMessage[] {
37 |   const messages = db
38 |     .query("SELECT role, content FROM messages WHERE chat_id = ? ORDER BY created_at ASC")
39 |     .all(chatId);
40 |   return messages as ChatMessage[];
41 | }
42 | 
43 | /**
44 |  * Saves a chat message to the database, ensuring the chat exists.
45 |  * @param db The bun:sqlite Database instance.
46 |  * @param chatId The chat session ID.
47 |  * @param message The chat message to save.
48 |  */
49 | export function saveChatMessage(db: Database, chatId: string, message: ChatMessage) {
50 |   db.prepare("INSERT OR IGNORE INTO chats (id) VALUES (?)").run(chatId);
51 |   db.prepare("INSERT INTO messages (chat_id, role, content) VALUES (?, ?, ?)").run(
52 |     chatId,
53 |     message.role,
54 |     message.content,
55 |   );
56 | }
57 | 


--------------------------------------------------------------------------------
/src/__tests__/unit/utils.test.ts:
--------------------------------------------------------------------------------
 1 | import { describe, expect, it } from "vitest";
 2 | 
 3 | describe("Utility Functions", () => {
 4 |   describe("URL validation and parsing", () => {
 5 |     it("should validate basic URLs", () => {
 6 |       const validUrls = ["https://example.com", "http://test.org", "https://github.com/user/repo"];
 7 | 
 8 |       for (const url of validUrls) {
 9 |         expect(() => new URL(url)).not.toThrow();
10 |       }
11 |     });
12 | 
13 |     it("should identify GitHub repository URLs", () => {
14 |       const githubRepoPattern = /^https:\/\/github\.com\/[^\/]+\/[^\/]+\/?$/;
15 | 
16 |       const testCases = [
17 |         { url: "https://github.com/microsoft/vscode", isRepo: true },
18 |         { url: "https://github.com/facebook/react", isRepo: true },
19 |         { url: "https://github.com/microsoft", isRepo: false }, // user/org page
20 |         { url: "https://github.com/microsoft/vscode/issues", isRepo: false }, // sub-page
21 |         { url: "https://example.com", isRepo: false },
22 |       ];
23 | 
24 |       for (const testCase of testCases) {
25 |         const isMatch = githubRepoPattern.test(testCase.url);
26 |         expect(isMatch).toBe(testCase.isRepo);
27 |       }
28 |     });
29 |   });
30 | 
31 |   describe("Content validation", () => {
32 |     it("should identify HTML content types", () => {
33 |       const htmlContentTypes = ["text/html", "text/html; charset=utf-8", "application/xhtml+xml"];
34 | 
35 |       // Simple check for HTML content types
36 |       expect(htmlContentTypes.length).toBeGreaterThan(0);
37 |     });
38 | 
39 |     it("should validate content length", () => {
40 |       const minLength = 100;
41 |       const validContent = "a".repeat(minLength + 10);
42 |       const invalidContent = "a".repeat(minLength - 10);
43 | 
44 |       expect(validContent.length).toBeGreaterThan(minLength);
45 |       expect(invalidContent.length).toBeLessThan(minLength);
46 |     });
47 |   });
48 | 
49 |   describe("Parameter validation", () => {
50 |     it("should validate depth parameters", () => {
51 |       const validDepths = [1, 2, 3, 4, 5];
52 |       const invalidDepths = [-1, 0, 6, 10];
53 | 
54 |       expect(validDepths.every((d) => d >= 1 && d <= 5)).toBe(true);
55 |       expect(invalidDepths.some((d) => d < 1 || d > 5)).toBe(true);
56 |     });
57 | 
58 |     it("should validate boolean parameters", () => {
59 |       const truthyValues = [true, "true", 1, "1"];
60 |       const falsyValues = [false, "false", 0, "0", null, undefined];
61 | 
62 |       // Basic boolean validation
63 |       expect(truthyValues.length).toBeGreaterThan(0);
64 |       expect(falsyValues.length).toBeGreaterThan(0);
65 |     });
66 |   });
67 | });
68 | 


--------------------------------------------------------------------------------
/src/__tests__/unit/config.test.ts:
--------------------------------------------------------------------------------
 1 | import { describe, expect, it } from "vitest";
 2 | import { CONFIG } from "../../server/config.js";
 3 | 
 4 | describe("Configuration", () => {
 5 |   describe("Timeout Values", () => {
 6 |     it("should have consistent timeout values", () => {
 7 |       expect(CONFIG.PAGE_TIMEOUT).toBeGreaterThan(0);
 8 |       expect(CONFIG.SELECTOR_TIMEOUT).toBeGreaterThan(0);
 9 |       expect(CONFIG.ANSWER_WAIT_TIMEOUT).toBeGreaterThan(0);
10 |       expect(CONFIG.MCP_TIMEOUT_BUFFER).toBeGreaterThan(0);
11 |     });
12 | 
13 |     it("should have reasonable timeout relationships", () => {
14 |       // Page timeout should be greater than selector timeout
15 |       expect(CONFIG.PAGE_TIMEOUT).toBeGreaterThan(CONFIG.SELECTOR_TIMEOUT);
16 | 
17 |       // Answer wait timeout should be substantial for content loading
18 |       expect(CONFIG.ANSWER_WAIT_TIMEOUT).toBeGreaterThan(30000);
19 |     });
20 |   });
21 | 
22 |   describe("User Agent", () => {
23 |     it("should have valid user agent string", () => {
24 |       expect(typeof CONFIG.USER_AGENT).toBe("string");
25 |       expect(CONFIG.USER_AGENT.length).toBeGreaterThan(0);
26 |       expect(CONFIG.USER_AGENT).toContain("Mozilla");
27 |       expect(CONFIG.USER_AGENT).toContain("Chrome");
28 |     });
29 |   });
30 | 
31 |   describe("Retry Configuration", () => {
32 |     it("should have reasonable retry limits", () => {
33 |       expect(CONFIG.MAX_RETRIES).toBeGreaterThan(0);
34 |       expect(CONFIG.MAX_RETRIES).toBeLessThan(20);
35 |     });
36 |   });
37 | 
38 |   describe("Timeout Profiles", () => {
39 |     it("should have valid timeout profiles", () => {
40 |       expect(CONFIG.TIMEOUT_PROFILES).toBeDefined();
41 |       expect(CONFIG.TIMEOUT_PROFILES.navigation).toBeGreaterThan(0);
42 |       expect(CONFIG.TIMEOUT_PROFILES.selector).toBeGreaterThan(0);
43 |       expect(CONFIG.TIMEOUT_PROFILES.content).toBeGreaterThan(0);
44 |       expect(CONFIG.TIMEOUT_PROFILES.recovery).toBeGreaterThan(0);
45 |     });
46 | 
47 |     it("should have consistent timeout profile relationships", () => {
48 |       // Navigation timeout should be substantial
49 |       expect(CONFIG.TIMEOUT_PROFILES.navigation).toBeGreaterThan(30000);
50 | 
51 |       // Content timeout should be the longest
52 |       expect(CONFIG.TIMEOUT_PROFILES.content).toBeGreaterThan(CONFIG.TIMEOUT_PROFILES.navigation);
53 |     });
54 |   });
55 | 
56 |   describe("Debug Configuration", () => {
57 |     it("should have valid debug settings", () => {
58 |       expect(typeof CONFIG.DEBUG.CAPTURE_SCREENSHOTS).toBe("boolean");
59 |       expect(typeof CONFIG.DEBUG.SCREENSHOT_ON_RECOVERY_SUCCESS).toBe("boolean");
60 |       expect(CONFIG.DEBUG.MAX_SCREENSHOTS).toBeGreaterThan(0);
61 |     });
62 |   });
63 | });
64 | 


--------------------------------------------------------------------------------
/src/server/__tests__/toolHandlerSetup.test.ts:
--------------------------------------------------------------------------------
 1 | import { beforeEach, describe, expect, it, vi } from "vitest";
 2 | import { CallToolRequestSchema, ListToolsRequestSchema } from "@modelcontextprotocol/sdk/types.js";
 3 | import { setupToolHandlers, createToolHandlersRegistry } from "../toolHandlerSetup.js";
 4 | import type { ToolHandlersRegistry } from "../../types/index.js";
 5 | 
 6 | describe("Tool Handler Setup", () => {
 7 |   let mockServer: any;
 8 |   let mockToolHandlers: ToolHandlersRegistry;
 9 | 
10 |   beforeEach(() => {
11 |     // Mock Server
12 |     mockServer = {
13 |       setRequestHandler: vi.fn(),
14 |     };
15 | 
16 |     // Mock Tool Handlers
17 |     mockToolHandlers = {
18 |       chatPerplexity: vi.fn().mockResolvedValue("chat response"),
19 |       search: vi.fn().mockResolvedValue("search response"),
20 |       extractUrlContent: vi.fn().mockResolvedValue("extract response"),
21 |       getDocumentation: vi.fn().mockResolvedValue("doc response"),
22 |       findApis: vi.fn().mockResolvedValue("api response"),
23 |       checkDeprecatedCode: vi.fn().mockResolvedValue("deprecated response"),
24 |     } as ToolHandlersRegistry;
25 |   });
26 | 
27 |   describe("setupToolHandlers", () => {
28 |     it("should register ListTools handler", () => {
29 |       setupToolHandlers(mockServer, mockToolHandlers);
30 | 
31 |       expect(mockServer.setRequestHandler).toHaveBeenCalledWith(
32 |         ListToolsRequestSchema,
33 |         expect.any(Function),
34 |       );
35 |     });
36 | 
37 |     it("should register CallTool handler", () => {
38 |       setupToolHandlers(mockServer, mockToolHandlers);
39 | 
40 |       expect(mockServer.setRequestHandler).toHaveBeenCalledWith(
41 |         CallToolRequestSchema,
42 |         expect.any(Function),
43 |       );
44 |     });
45 | 
46 |     it("should call the appropriate tool handler for known tools", async () => {
47 |       setupToolHandlers(mockServer, mockToolHandlers);
48 | 
49 |       // Get the CallTool handler function (second call)
50 |       const callToolHandler = mockServer.setRequestHandler.mock.calls[1][1];
51 | 
52 |       const mockRequest = {
53 |         params: {
54 |           name: "chatPerplexity",
55 |           arguments: { message: "test" },
56 |         },
57 |       };
58 | 
59 |       const response = await callToolHandler(mockRequest);
60 |       expect(mockToolHandlers["chatPerplexity"]).toHaveBeenCalledWith({ message: "test" });
61 |       expect(response).toHaveProperty("content");
62 |     });
63 |   });
64 | 
65 |   describe("createToolHandlersRegistry", () => {
66 |     it("should create a tool handlers registry with provided handlers", () => {
67 |       const registry = createToolHandlersRegistry(mockToolHandlers);
68 | 
69 |       expect(registry).toBeDefined();
70 |       expect(registry["chatPerplexity"]).toBe(mockToolHandlers["chatPerplexity"]);
71 |       expect(registry["search"]).toBe(mockToolHandlers["search"]);
72 |       expect(registry["extractUrlContent"]).toBe(mockToolHandlers["extractUrlContent"]);
73 |       expect(registry["getDocumentation"]).toBe(mockToolHandlers["getDocumentation"]);
74 |       expect(registry["findApis"]).toBe(mockToolHandlers["findApis"]);
75 |       expect(registry["checkDeprecatedCode"]).toBe(mockToolHandlers["checkDeprecatedCode"]);
76 |     });
77 |   });
78 | });
79 | 


--------------------------------------------------------------------------------
/src/server/modules/DatabaseManager.ts:
--------------------------------------------------------------------------------
  1 | import { existsSync, mkdirSync } from "node:fs";
  2 | import { dirname, join } from "node:path";
  3 | import { fileURLToPath } from "node:url";
  4 | /**
  5 |  * DatabaseManager - Handles all database operations
  6 |  * Focused, testable module for SQLite database management
  7 |  */
  8 | import { Database } from "bun:sqlite";
  9 | import type { ChatMessage, IDatabaseManager } from "../../types/index.js";
 10 | import { getChatHistory, initializeDatabase, saveChatMessage } from "../../utils/db.js";
 11 | import { logError, logInfo } from "../../utils/logging.js";
 12 | 
 13 | export class DatabaseManager implements IDatabaseManager {
 14 |   private db: Database | null = null;
 15 |   private initialized = false;
 16 | 
 17 |   constructor(private readonly customDbPath?: string) {}
 18 | 
 19 |   initialize(): void {
 20 |     try {
 21 |       // Determine database path
 22 |       const dbPath =
 23 |         this.customDbPath ||
 24 |         join(dirname(fileURLToPath(import.meta.url)), "..", "..", "chat_history.db");
 25 | 
 26 |       const dbDir = dirname(dbPath);
 27 | 
 28 |       logInfo(`Initializing database at: ${dbPath}`);
 29 | 
 30 |       // Create directory if it doesn't exist
 31 |       if (!existsSync(dbDir)) {
 32 |         mkdirSync(dbDir, { recursive: true });
 33 |         logInfo(`Created database directory: ${dbDir}`);
 34 |       }
 35 | 
 36 |       // Initialize SQLite database
 37 |       this.db = new Database(dbPath, { create: true });
 38 | 
 39 |       // Run database initialization script
 40 |       initializeDatabase(this.db);
 41 | 
 42 |       this.initialized = true;
 43 |       logInfo("DatabaseManager initialized successfully");
 44 |     } catch (error) {
 45 |       logError("DatabaseManager initialization failed:", {
 46 |         error: error instanceof Error ? error.message : String(error),
 47 |         stack: error instanceof Error ? error.stack : undefined,
 48 |       });
 49 |       throw error;
 50 |     }
 51 |   }
 52 | 
 53 |   getChatHistory(chatId?: string): ChatMessage[] {
 54 |     if (!this.isInitialized()) {
 55 |       throw new Error("Database not initialized");
 56 |     }
 57 | 
 58 |     if (!chatId) {
 59 |       throw new Error("Chat ID is required");
 60 |     }
 61 | 
 62 |     try {
 63 |       return getChatHistory(this.db as Database, chatId);
 64 |     } catch (error) {
 65 |       logError("Failed to get chat history:", {
 66 |         error: error instanceof Error ? error.message : String(error),
 67 |         chatId,
 68 |       });
 69 |       throw error;
 70 |     }
 71 |   }
 72 | 
 73 |   saveChatMessage(chatId: string, role: "user" | "assistant", content: string): void {
 74 |     if (!this.isInitialized()) {
 75 |       throw new Error("Database not initialized");
 76 |     }
 77 | 
 78 |     try {
 79 |       const message: ChatMessage = { role, content };
 80 |       saveChatMessage(this.db as Database, chatId, message);
 81 |       logInfo(`Saved ${role} message for chat ${chatId}`);
 82 |     } catch (error) {
 83 |       logError("Failed to save chat message:", {
 84 |         error: error instanceof Error ? error.message : String(error),
 85 |         chatId,
 86 |         role,
 87 |       });
 88 |       throw error;
 89 |     }
 90 |   }
 91 | 
 92 |   close(): void {
 93 |     try {
 94 |       if (this.db) {
 95 |         this.db.close();
 96 |         this.db = null;
 97 |         this.initialized = false;
 98 |         logInfo("Database connection closed successfully");
 99 |       }
100 |     } catch (error) {
101 |       logError("Error closing database:", {
102 |         error: error instanceof Error ? error.message : String(error),
103 |       });
104 |     }
105 |   }
106 | 
107 |   isInitialized(): boolean {
108 |     return this.initialized && this.db !== null;
109 |   }
110 | 
111 |   // Getter for testing purposes
112 |   getDatabase(): Database | null {
113 |     return this.db;
114 |   }
115 | }
116 | 


--------------------------------------------------------------------------------
/src/server/toolHandlerSetup.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Tool Handler Setup Module
  3 |  * Manages MCP tool registration and request handling logic
  4 |  */
  5 | 
  6 | import crypto from "node:crypto";
  7 | import type { Server } from "@modelcontextprotocol/sdk/server/index.js";
  8 | import {
  9 |   CallToolRequestSchema,
 10 |   ErrorCode,
 11 |   ListToolsRequestSchema,
 12 |   McpError,
 13 | } from "@modelcontextprotocol/sdk/types.js";
 14 | 
 15 | import { TOOL_SCHEMAS } from "../schema/toolSchemas.js";
 16 | import type { ChatPerplexityArgs, ToolHandlersRegistry } from "../types/index.js";
 17 | import { logError, logWarn } from "../utils/logging.js";
 18 | 
 19 | /**
 20 |  * Sets up MCP tool handlers for the server
 21 |  * @param server - The MCP Server instance
 22 |  * @param toolHandlers - Registry of tool handler functions
 23 |  */
 24 | export function setupToolHandlers(server: Server, toolHandlers: ToolHandlersRegistry): void {
 25 |   // Register ListTools handler
 26 |   server.setRequestHandler(ListToolsRequestSchema, async () => {
 27 |     return {
 28 |       tools: TOOL_SCHEMAS,
 29 |     };
 30 |   });
 31 | 
 32 |   // Register CallTool handler with comprehensive error handling and timeout management
 33 |   server.setRequestHandler(CallToolRequestSchema, async (request) => {
 34 |     const { name, arguments: args } = request.params;
 35 | 
 36 |     // Set a timeout for the entire MCP request
 37 |     const requestTimeout = setTimeout(() => {
 38 |       logWarn("MCP request is taking too long, this might lead to a timeout");
 39 |     }, 60000); // 60 seconds warning
 40 | 
 41 |     try {
 42 |       if (toolHandlers[name]) {
 43 |         const result = await toolHandlers[name](args || {});
 44 | 
 45 |         // Special case for chat to return chat_id
 46 |         if (name === "chat_perplexity") {
 47 |           const chatArgs = (args || {}) as unknown as ChatPerplexityArgs;
 48 |           const chatId = chatArgs.chat_id || crypto.randomUUID();
 49 |           return {
 50 |             content: [
 51 |               {
 52 |                 type: "text",
 53 |                 text: JSON.stringify({ chat_id: chatId, response: result }, null, 2),
 54 |               },
 55 |             ],
 56 |           };
 57 |         }
 58 | 
 59 |         return { content: [{ type: "text", text: result }] };
 60 |       }
 61 |       throw new McpError(ErrorCode.MethodNotFound, `Tool ${name} not found`);
 62 |     } catch (error) {
 63 |       logError(`Error executing tool ${name}:`, {
 64 |         error: error instanceof Error ? error.message : String(error),
 65 |       });
 66 | 
 67 |       if (error instanceof Error) {
 68 |         const errorMsg = error.message;
 69 | 
 70 |         if (errorMsg.includes("timeout") || errorMsg.includes("Timed out")) {
 71 |           logError("Timeout detected in MCP request");
 72 |           return {
 73 |             content: [
 74 |               {
 75 |                 type: "text",
 76 |                 text: "The operation timed out. This might be due to high server load or network issues. Please try again with a more specific query.",
 77 |               },
 78 |             ],
 79 |           };
 80 |         }
 81 | 
 82 |         return {
 83 |           content: [
 84 |             {
 85 |               type: "text",
 86 |               text: `The operation encountered an error: ${errorMsg}. Please try again.`,
 87 |             },
 88 |           ],
 89 |         };
 90 |       }
 91 | 
 92 |       throw new McpError(
 93 |         ErrorCode.InternalError,
 94 |         `Tool execution failed: ${error instanceof Error ? error.message : String(error)}`,
 95 |       );
 96 |     } finally {
 97 |       clearTimeout(requestTimeout);
 98 |     }
 99 |   });
100 | }
101 | 
102 | /**
103 |  * Creates a tool handlers registry with the provided handlers
104 |  * @param handlers - Object mapping tool names to handler functions
105 |  * @returns ToolHandlersRegistry
106 |  */
107 | export function createToolHandlersRegistry(handlers: ToolHandlersRegistry): ToolHandlersRegistry {
108 |   return handlers;
109 | }
110 | 


--------------------------------------------------------------------------------
/src/__tests__/unit/schemas.test.ts:
--------------------------------------------------------------------------------
  1 | import { describe, expect, it } from "vitest";
  2 | import { TOOL_SCHEMAS } from "../../schema/toolSchemas.js";
  3 | 
  4 | describe("Tool Schemas", () => {
  5 |   describe("Schema Structure Validation", () => {
  6 |     it("should have all required tools", () => {
  7 |       expect(TOOL_SCHEMAS).toHaveLength(6);
  8 | 
  9 |       const toolNames = TOOL_SCHEMAS.map((schema) => schema.name);
 10 |       expect(toolNames).toContain("chat_perplexity");
 11 |       expect(toolNames).toContain("search");
 12 |       expect(toolNames).toContain("extract_url_content");
 13 |       expect(toolNames).toContain("get_documentation");
 14 |       expect(toolNames).toContain("find_apis");
 15 |       expect(toolNames).toContain("check_deprecated_code");
 16 |     });
 17 | 
 18 |     it("should have valid schema structure for each tool", () => {
 19 |       TOOL_SCHEMAS.forEach((schema) => {
 20 |         // Basic required fields
 21 |         expect(schema.name).toBeDefined();
 22 |         expect(typeof schema.name).toBe("string");
 23 |         expect(schema.description).toBeDefined();
 24 |         expect(typeof schema.description).toBe("string");
 25 |         expect(schema.category).toBeDefined();
 26 |         expect(typeof schema.category).toBe("string");
 27 | 
 28 |         // Input schema
 29 |         expect(schema.inputSchema).toBeDefined();
 30 | 
 31 |         // Keywords and use cases
 32 |         expect(Array.isArray(schema.keywords)).toBe(true);
 33 |         expect(Array.isArray(schema.use_cases)).toBe(true);
 34 | 
 35 |         // Examples
 36 |         expect(Array.isArray(schema.examples)).toBe(true);
 37 |         expect(schema.examples.length).toBeGreaterThan(0);
 38 |       });
 39 |     });
 40 |   });
 41 | 
 42 |   describe("Required Field Definitions", () => {
 43 |     it("should have proper required field definitions in input schemas", () => {
 44 |       TOOL_SCHEMAS.forEach((schema) => {
 45 |         if (schema.inputSchema.required) {
 46 |           expect(Array.isArray(schema.inputSchema.required)).toBe(true);
 47 | 
 48 |           // Check that required fields are defined in properties
 49 |           schema.inputSchema.required.forEach((field: any) => {
 50 |             expect((schema.inputSchema.properties as any)[field]).toBeDefined();
 51 |           });
 52 |         }
 53 |       });
 54 |     });
 55 | 
 56 |     it("should have descriptive field definitions", () => {
 57 |       TOOL_SCHEMAS.forEach((schema) => {
 58 |         Object.keys(schema.inputSchema.properties).forEach((fieldName) => {
 59 |           const field: any = (schema.inputSchema.properties as any)[fieldName];
 60 |           expect(field.description).toBeDefined();
 61 |           expect(typeof field.description).toBe("string");
 62 |           expect(field.description.length).toBeGreaterThan(0);
 63 |         });
 64 |       });
 65 |     });
 66 |   });
 67 | 
 68 |   describe("Example Data Validity", () => {
 69 |     it("should have valid example data for all tools", () => {
 70 |       TOOL_SCHEMAS.forEach((schema) => {
 71 |         expect(schema.examples.length).toBeGreaterThan(0);
 72 | 
 73 |         schema.examples.forEach((example) => {
 74 |           expect(example.description).toBeDefined();
 75 |           expect(typeof example.description).toBe("string");
 76 |           expect(example.input).toBeDefined();
 77 |           expect(example.output).toBeDefined();
 78 | 
 79 |           // Check that required input fields are present in examples
 80 |           if (schema.inputSchema.required) {
 81 |             schema.inputSchema.required.forEach((requiredField: any) => {
 82 |               expect((example.input as any)[requiredField]).toBeDefined();
 83 |             });
 84 |           }
 85 |         });
 86 |       });
 87 |     });
 88 |   });
 89 | 
 90 |   describe("Schema Completeness", () => {
 91 |     it("should have comprehensive categories for all tools", () => {
 92 |       const categories = TOOL_SCHEMAS.map((schema) => schema.category);
 93 |       expect(categories).toContain("Conversation");
 94 |       expect(categories).toContain("Information Extraction");
 95 |       expect(categories).toContain("Technical Reference");
 96 |       expect(categories).toContain("API Discovery");
 97 |       expect(categories).toContain("Code Analysis");
 98 |       expect(categories).toContain("Web Search");
 99 |     });
100 | 
101 |     it("should have related tools references", () => {
102 |       TOOL_SCHEMAS.forEach((schema) => {
103 |         expect(Array.isArray(schema.related_tools)).toBe(true);
104 |       });
105 |     });
106 | 
107 |     it("should have proper schema structure", () => {
108 |       TOOL_SCHEMAS.forEach((schema) => {
109 |         // Check input schema only
110 |         expect(schema.inputSchema.type).toBe("object");
111 |         expect(schema.inputSchema.properties).toBeDefined();
112 |       });
113 |     });
114 |   });
115 | });
116 | 


--------------------------------------------------------------------------------
/src/types/browser.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Browser and Puppeteer related type definitions
  3 |  */
  4 | import type { Browser, Page } from "puppeteer";
  5 | 
  6 | // ─── GLOBAL BROWSER DECLARATIONS ─────────────────────────────────────────────
  7 | declare global {
  8 |   interface Window {
  9 |     chrome: {
 10 |       app: {
 11 |         InstallState: {
 12 |           DISABLED: string;
 13 |           INSTALLED: string;
 14 |           NOT_INSTALLED: string;
 15 |         };
 16 |         RunningState: {
 17 |           CANNOT_RUN: string;
 18 |           READY_TO_RUN: string;
 19 |           RUNNING: string;
 20 |         };
 21 |         getDetails: () => void;
 22 |         getIsInstalled: () => void;
 23 |         installState: () => void;
 24 |         isInstalled: boolean;
 25 |         runningState: () => void;
 26 |       };
 27 |       runtime: {
 28 |         OnInstalledReason: {
 29 |           CHROME_UPDATE: string;
 30 |           INSTALL: string;
 31 |           SHARED_MODULE_UPDATE: string;
 32 |           UPDATE: string;
 33 |         };
 34 |         PlatformArch: {
 35 |           ARM: string;
 36 |           ARM64: string;
 37 |           MIPS: string;
 38 |           MIPS64: string;
 39 |           X86_32: string;
 40 |           X86_64: string;
 41 |         };
 42 |         PlatformNaclArch: {
 43 |           ARM: string;
 44 |           MIPS: string;
 45 |           PNACL: string;
 46 |           X86_32: string;
 47 |           X86_64: string;
 48 |         };
 49 |         PlatformOs: {
 50 |           ANDROID: string;
 51 |           CROS: string;
 52 |           LINUX: string;
 53 |           MAC: string;
 54 |           OPENBSD: string;
 55 |           WIN: string;
 56 |         };
 57 |         RequestUpdateCheckStatus: {
 58 |           NO_UPDATE: string;
 59 |           THROTTLED: string;
 60 |           UPDATE_AVAILABLE: string;
 61 |         };
 62 |         connect: () => {
 63 |           postMessage: () => void;
 64 |           onMessage: {
 65 |             addListener: () => void;
 66 |             removeListener: () => void;
 67 |           };
 68 |           disconnect: () => void;
 69 |         };
 70 |       };
 71 |     };
 72 |   }
 73 | }
 74 | 
 75 | // ─── BROWSER CONFIG TYPES ─────────────────────────────────────────────
 76 | export interface BrowserConfig {
 77 |   USER_AGENT: string;
 78 |   PAGE_TIMEOUT: number;
 79 |   SELECTOR_TIMEOUT: number;
 80 |   MAX_RETRIES: number;
 81 |   RECOVERY_WAIT_TIME: number;
 82 |   TIMEOUT_PROFILES: {
 83 |     navigation: number;
 84 |   };
 85 | }
 86 | 
 87 | export interface RecoveryContext {
 88 |   hasValidPage: boolean;
 89 |   hasBrowser: boolean;
 90 |   isBrowserConnected: boolean;
 91 |   operationCount: number;
 92 | }
 93 | 
 94 | export interface ErrorAnalysis {
 95 |   isTimeout: boolean;
 96 |   isNavigation: boolean;
 97 |   isConnection: boolean;
 98 |   isDetachedFrame: boolean;
 99 |   isCaptcha: boolean;
100 |   consecutiveTimeouts: number;
101 |   consecutiveNavigationErrors: number;
102 | }
103 | 
104 | // ─── PUPPETEER CONTEXT TYPE ───────────────────────────────────────────
105 | export interface PuppeteerContext {
106 |   browser: Browser | null;
107 |   page: Page | null;
108 |   isInitializing: boolean;
109 |   searchInputSelector: string;
110 |   lastSearchTime: number;
111 |   idleTimeout: NodeJS.Timeout | null;
112 |   operationCount: number;
113 |   log: (level: "info" | "error" | "warn", message: string) => void;
114 |   setBrowser: (browser: Browser | null) => void;
115 |   setPage: (page: Page | null) => void;
116 |   setIsInitializing: (val: boolean) => void;
117 |   setSearchInputSelector: (selector: string) => void;
118 |   setIdleTimeout: (timeout: NodeJS.Timeout | null) => void;
119 |   incrementOperationCount: () => number;
120 |   determineRecoveryLevel: (error?: Error) => number;
121 |   IDLE_TIMEOUT_MS: number;
122 | }
123 | 
124 | // ─── BROWSER MANAGER INTERFACE ────────────────────────────────────────
125 | export interface IBrowserManager {
126 |   initialize(): Promise<void>;
127 |   navigateToPerplexity(): Promise<void>;
128 |   waitForSearchInput(): Promise<string | null>;
129 |   checkForCaptcha(): Promise<boolean>;
130 |   performRecovery(error?: Error): Promise<void>;
131 |   isReady(): boolean;
132 |   cleanup(): Promise<void>;
133 |   getPage(): Page | null;
134 |   getBrowser(): Browser | null;
135 |   resetIdleTimeout(): void;
136 |   getPuppeteerContext(): PuppeteerContext;
137 | }
138 | 
139 | // ─── CONTENT EXTRACTION TYPES ─────────────────────────────────────────
140 | export interface PageContentResult {
141 |   url: string;
142 |   title?: string | null;
143 |   textContent?: string | null;
144 |   error?: string | null;
145 | }
146 | 
147 | export interface RecursiveFetchResult {
148 |   status: "Success" | "SuccessWithPartial" | "Error";
149 |   message?: string;
150 |   rootUrl: string;
151 |   explorationDepth: number;
152 |   pagesExplored: number;
153 |   content: PageContentResult[];
154 | }
155 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Perplexity MCP Zerver
  2 | 
  3 | A minimalist research server implementing the Model Context Protocol (MCP) to deliver AI-powered research capabilities through Perplexity's web interface.
  4 | 
  5 | [![MCP Compatible](https://img.shields.io/badge/MCP-Compatible-333)]()
  6 | [![TypeScript Codebase](https://img.shields.io/badge/TypeScript-Codebase-333)]()
  7 | [![Tests Passing](https://img.shields.io/badge/Tests-Passing-333)]()
  8 | [![Bun Runtime](https://img.shields.io/badge/Runtime-Bun-333)]()
  9 | 
 10 | ## Research Capabilities
 11 | 
 12 | - **Intelligent Web Research**: Search and summarize content without API limits
 13 | - **Persistent Conversations**: Maintain context with local SQLite chat storage
 14 | - **Content Extraction**: Clean article extraction with GitHub repository parsing
 15 | - **Developer Tooling**: Documentation retrieval, API discovery, code analysis
 16 | - **Keyless Operation**: Browser automation replaces API key requirements
 17 | 
 18 | ---
 19 | 
 20 | ## Available Tools
 21 | 
 22 | ### Search (`search`)
 23 | Perform research queries with configurable depth  
 24 | *Returns raw text results*
 25 | 
 26 | ### Get Documentation (`get_documentation`)
 27 | Retrieve technical documentation with examples  
 28 | *Returns structured documentation*
 29 | 
 30 | ### Find APIs (`find_apis`)
 31 | Discover relevant APIs for development needs  
 32 | *Returns API listings and descriptions*
 33 | 
 34 | ### Check Deprecated Code (`check_deprecated_code`)
 35 | Analyze code snippets for outdated patterns  
 36 | *Returns analysis report*
 37 | 
 38 | ### Extract URL Content (`extract_url_content`)
 39 | Parse web content with automatic GitHub handling  
 40 | *Returns structured content metadata*
 41 | 
 42 | ### Chat (`chat_perplexity`)
 43 | Persistent conversations with context history  
 44 | *Returns conversation state in JSON format*
 45 | 
 46 | ---
 47 | 
 48 | ## Getting Started
 49 | 
 50 | ### Prerequisites
 51 | - Bun runtime
 52 | - Node.js 18+ (for TypeScript compilation)
 53 | 
 54 | ### Installation
 55 | ```bash
 56 | git clone https://github.com/wysh3/perplexity-mcp-zerver.git
 57 | cd perplexity-mcp-zerver
 58 | bun install
 59 | bun run build
 60 | ```
 61 | 
 62 | ### Configuration
 63 | Add to your MCP configuration file:
 64 | ```json
 65 | {
 66 |   "mcpServers": {
 67 |     "perplexity-server": {
 68 |       "command": "bun",
 69 |       "args": ["/absolute/path/to/build/main.js"],
 70 |       "timeout": 300
 71 |     }
 72 |   }
 73 | }
 74 | ```
 75 | 
 76 | ### Usage
 77 | Initiate commands through your MCP client:
 78 | - "Use perplexity to research quantum computing advancements"
 79 | - "Ask perplexity-server for React 18 documentation"
 80 | - "Begin conversation with perplexity about neural networks"
 81 | 
 82 | ---
 83 | 
 84 | ## 🔐 Pro Account Support (Optional)
 85 | 
 86 | Use your Perplexity Pro subscription for access to better models (GPT-5.1, Claude Sonnet 4.5) and higher limits.
 87 | 
 88 | ### One-Time Setup
 89 | ```bash
 90 | bun run build
 91 | bun run login
 92 | ```
 93 | 
 94 | A browser window will open. **Log in using email** (recommended for best compatibility), then close the browser. Your session is now saved!
 95 | 
 96 | > **Note**: Google/SSO login may work but email login is more reliable with the browser automation.
 97 | 
 98 | ### Environment Variables
 99 | 
100 | | Variable | Default | Description |
101 | |----------|---------|-------------|
102 | | `PERPLEXITY_BROWSER_DATA_DIR` | `~/.perplexity-mcp` | Browser profile directory |
103 | | `PERPLEXITY_PERSISTENT_PROFILE` | `true` | Set to `false` for anonymous mode |
104 | 
105 | ---
106 | 
107 | ## Technical Comparison
108 | 
109 | | Feature              | This Implementation | Traditional APIs |
110 | |----------------------|---------------------|------------------|
111 | | Authentication       | None required       | API keys         |
112 | | Cost                 | Free                | Usage-based      |
113 | | Data Privacy         | Local processing    | Remote servers   |
114 | | GitHub Integration   | Native support      | Limited          |
115 | | History Persistence  | SQLite storage      | Session-based    |
116 | 
117 | ---
118 | 
119 | ## Troubleshooting
120 | 
121 | **Server Connection Issues**
122 | 1. Verify absolute path in configuration
123 | 2. Confirm Node.js installation with `node -v`
124 | 3. Ensure build completed successfully
125 | 
126 | **Content Extraction**
127 | - GitHub paths must use full repository URLs
128 | - Adjust link recursion depth in source configuration
129 | 
130 | ---
131 | 
132 | ## Origins & License
133 |  
134 | based on - [DaInfernalCoder/perplexity-researcher-mcp](https://github.com/DaInfernalCoder/perplexity-researcher-mcp)  
135 | refactored from - [sm-moshi/docshunter](https://github.com/sm-moshi/docshunter)  
136 | 
137 | Licensed under **GNU GPL v3.0** - [View License](LICENSE)
138 | 
139 | ---
140 | 
141 | > This project interfaces with Perplexity via browser automation. Use responsibly and ethically. Stability depends on Perplexity's website consistency. Educational use only.
142 | 


--------------------------------------------------------------------------------
/src/tools/extractUrlContent.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Tool handler for 'extract_url_content'.
  3 |  * Extracts main article text content from a given URL, optionally recursively exploring links up to a specified depth.
  4 |  * @param args - { url: string; depth?: number }
  5 |  * @param ctx - PuppeteerContext for browser operations
  6 |  * @returns The extraction result as a JSON string
  7 |  */
  8 | import type { PageContentResult, PuppeteerContext } from "../types/index.js";
  9 | import { fetchSinglePageContent, recursiveFetch } from "../utils/extraction.js";
 10 | 
 11 | // Helper functions for content extraction
 12 | function createTimeoutSetup(
 13 |   globalTimeoutDuration: number,
 14 |   globalTimeoutSignal: { timedOut: boolean },
 15 | ) {
 16 |   let globalTimeoutHandle: NodeJS.Timeout | null = null;
 17 | 
 18 |   const timeoutPromise = new Promise<never>((_, reject) => {
 19 |     globalTimeoutHandle = setTimeout(() => {
 20 |       globalTimeoutSignal.timedOut = true;
 21 |       reject(new Error(`Recursive fetch timed out after ${globalTimeoutDuration}ms`));
 22 |     }, globalTimeoutDuration);
 23 |   });
 24 | 
 25 |   return { timeoutPromise, globalTimeoutHandle };
 26 | }
 27 | 
 28 | function determineStatus(results: PageContentResult[]): "Success" | "SuccessWithPartial" | "Error" {
 29 |   const successfulPages = results.filter((r) => !r.error && r.textContent);
 30 | 
 31 |   if (successfulPages.length === results.length) {
 32 |     return "Success";
 33 |   }
 34 |   if (successfulPages.length > 0) {
 35 |     return "SuccessWithPartial";
 36 |   }
 37 |   return "Error";
 38 | }
 39 | 
 40 | function generateStatusMessage(status: string, results: PageContentResult[]): string | undefined {
 41 |   if (status === "SuccessWithPartial") {
 42 |     const successfulPages = results.filter((r) => !r.error && r.textContent);
 43 |     return `Fetched ${successfulPages.length}/${results.length} pages successfully. Some pages failed or timed out.`;
 44 |   }
 45 | 
 46 |   if (status === "Error" && results.length > 0) {
 47 |     return "Failed to fetch all content. Initial page fetch might have failed or timed out.";
 48 |   }
 49 |   if (status === "Error") {
 50 |     return "Failed to fetch any content. Initial page fetch might have failed or timed out.";
 51 |   }
 52 | 
 53 |   return undefined;
 54 | }
 55 | 
 56 | function formatSuccessResult(
 57 |   status: string,
 58 |   message: string | undefined,
 59 |   url: string,
 60 |   validatedDepth: number,
 61 |   results: PageContentResult[],
 62 | ) {
 63 |   return {
 64 |     status,
 65 |     message,
 66 |     rootUrl: url,
 67 |     explorationDepth: validatedDepth,
 68 |     pagesExplored: results.length,
 69 |     content: results,
 70 |   };
 71 | }
 72 | 
 73 | function formatErrorResult(
 74 |   errorMessage: string,
 75 |   url: string,
 76 |   validatedDepth: number,
 77 |   results: PageContentResult[],
 78 | ) {
 79 |   if (results.length > 0) {
 80 |     return {
 81 |       status: "SuccessWithPartial",
 82 |       message: `Operation failed: ${errorMessage}. Returning partial results collected before failure.`,
 83 |       rootUrl: url,
 84 |       explorationDepth: validatedDepth,
 85 |       pagesExplored: results.length,
 86 |       content: results,
 87 |     };
 88 |   }
 89 | 
 90 |   return {
 91 |     status: "Error",
 92 |     message: `Recursive fetch failed: ${errorMessage}`,
 93 |     rootUrl: url,
 94 |     explorationDepth: validatedDepth,
 95 |     pagesExplored: 0,
 96 |     content: [],
 97 |   };
 98 | }
 99 | 
100 | export default async function extractUrlContent(
101 |   args: { url: string; depth?: number },
102 |   ctx: PuppeteerContext,
103 | ): Promise<string> {
104 |   const { url, depth = 1 } = args;
105 |   const validatedDepth = Math.max(1, Math.min(depth, 5));
106 | 
107 |   if (validatedDepth === 1) {
108 |     // For single page extraction, return the result directly as a string
109 |     const result = await fetchSinglePageContent(url, ctx);
110 |     return JSON.stringify(result, null, 2);
111 |   }
112 | 
113 |   // Recursive fetch logic
114 |   const visitedUrls = new Set<string>();
115 |   const results: PageContentResult[] = [];
116 |   const globalTimeoutDuration = ctx.IDLE_TIMEOUT_MS - 5000;
117 |   const globalTimeoutSignal = { timedOut: false };
118 | 
119 |   const { timeoutPromise, globalTimeoutHandle } = createTimeoutSetup(
120 |     globalTimeoutDuration,
121 |     globalTimeoutSignal,
122 |   );
123 | 
124 |   try {
125 |     const fetchPromise = recursiveFetch(
126 |       url,
127 |       validatedDepth,
128 |       1,
129 |       visitedUrls,
130 |       results,
131 |       globalTimeoutSignal,
132 |       ctx,
133 |     );
134 | 
135 |     await Promise.race([fetchPromise, timeoutPromise]);
136 |     if (globalTimeoutHandle) clearTimeout(globalTimeoutHandle);
137 | 
138 |     const status = determineStatus(results);
139 |     const message = generateStatusMessage(status, results);
140 |     const output = formatSuccessResult(status, message, url, validatedDepth, results);
141 | 
142 |     return JSON.stringify(output, null, 2);
143 |   } catch (error) {
144 |     if (globalTimeoutHandle) clearTimeout(globalTimeoutHandle);
145 |     const errorMessage = error instanceof Error ? error.message : String(error);
146 |     const output = formatErrorResult(errorMessage, url, validatedDepth, results);
147 | 
148 |     return JSON.stringify(output, null, 2);
149 |   }
150 | }
151 | 


--------------------------------------------------------------------------------
/src/__tests__/unit/types.test.ts:
--------------------------------------------------------------------------------
  1 | import { describe, expect, it } from "vitest";
  2 | import type {
  3 |   BrowserConfig,
  4 |   RecoveryContext,
  5 |   ErrorAnalysis,
  6 |   PuppeteerContext,
  7 |   IBrowserManager,
  8 |   ChatMessage,
  9 |   ChatResult,
 10 |   IDatabaseManager,
 11 |   ISearchEngine,
 12 |   ToolHandler,
 13 |   ToolHandlersRegistry,
 14 |   ChatPerplexityArgs,
 15 |   ExtractUrlContentArgs,
 16 |   SearchArgs,
 17 |   ServerDependencies,
 18 | } from "../../types/index.js";
 19 | 
 20 | describe("Type Definitions", () => {
 21 |   describe("Browser Types", () => {
 22 |     it("should define BrowserConfig structure", () => {
 23 |       const config: BrowserConfig = {
 24 |         USER_AGENT: "test-agent",
 25 |         PAGE_TIMEOUT: 30000,
 26 |         SELECTOR_TIMEOUT: 5000,
 27 |         MAX_RETRIES: 3,
 28 |         RECOVERY_WAIT_TIME: 5000,
 29 |         TIMEOUT_PROFILES: {
 30 |           navigation: 30000,
 31 |         },
 32 |       };
 33 | 
 34 |       expect(config.USER_AGENT).toBeTypeOf("string");
 35 |       expect(config.PAGE_TIMEOUT).toBeTypeOf("number");
 36 |     });
 37 | 
 38 |     it("should define RecoveryContext structure", () => {
 39 |       const context: RecoveryContext = {
 40 |         hasValidPage: true,
 41 |         hasBrowser: true,
 42 |         isBrowserConnected: true,
 43 |         operationCount: 5,
 44 |       };
 45 | 
 46 |       expect(context.hasValidPage).toBeTypeOf("boolean");
 47 |       expect(context.operationCount).toBeTypeOf("number");
 48 |     });
 49 | 
 50 |     it("should define ErrorAnalysis structure", () => {
 51 |       const errorAnalysis: ErrorAnalysis = {
 52 |         isTimeout: true,
 53 |         isNavigation: false,
 54 |         isConnection: false,
 55 |         isDetachedFrame: false,
 56 |         isCaptcha: false,
 57 |         consecutiveTimeouts: 0,
 58 |         consecutiveNavigationErrors: 0,
 59 |       };
 60 | 
 61 |       expect(errorAnalysis.isTimeout).toBeTypeOf("boolean");
 62 |       expect(errorAnalysis.consecutiveTimeouts).toBeTypeOf("number");
 63 |     });
 64 | 
 65 |     it("should define IBrowserManager interface", () => {
 66 |       // This is an interface, so we just verify it compiles
 67 |       const manager: IBrowserManager = {
 68 |         initialize: async () => {},
 69 |         navigateToPerplexity: async () => {},
 70 |         waitForSearchInput: async () => null,
 71 |         checkForCaptcha: async () => false,
 72 |         performRecovery: async () => {},
 73 |         isReady: () => true,
 74 |         cleanup: async () => {},
 75 |         getPage: () => null,
 76 |         getBrowser: () => null,
 77 |         resetIdleTimeout: () => {},
 78 |         getPuppeteerContext: () => ({}) as PuppeteerContext,
 79 |       };
 80 | 
 81 |       expect(manager).toBeDefined();
 82 |     });
 83 |   });
 84 | 
 85 |   describe("Database Types", () => {
 86 |     it("should define ChatMessage structure", () => {
 87 |       const message: ChatMessage = {
 88 |         role: "user",
 89 |         content: "Hello",
 90 |       };
 91 | 
 92 |       expect(message.role).toMatch(/^(user|assistant)$/);
 93 |       expect(message.content).toBeTypeOf("string");
 94 |     });
 95 | 
 96 |     it("should define ChatResult structure", () => {
 97 |       const result: ChatResult = {
 98 |         chat_id: "test-chat-id",
 99 |         response: "Test response",
100 |       };
101 | 
102 |       expect(result.chat_id).toBeTypeOf("string");
103 |       expect(result.response).toBeTypeOf("string");
104 |     });
105 | 
106 |     it("should define IDatabaseManager interface", () => {
107 |       const dbManager: IDatabaseManager = {
108 |         initialize: () => {},
109 |         getChatHistory: () => [],
110 |         saveChatMessage: () => {},
111 |         close: () => {},
112 |         isInitialized: () => true,
113 |       };
114 | 
115 |       expect(dbManager).toBeDefined();
116 |     });
117 |   });
118 | 
119 |   describe("Tool Types", () => {
120 |     it("should define ISearchEngine interface", () => {
121 |       const searchEngine: ISearchEngine = {
122 |         performSearch: async () => "result",
123 |       };
124 | 
125 |       expect(searchEngine.performSearch).toBeTypeOf("function");
126 |     });
127 | 
128 |     it("should define ToolHandler type", () => {
129 |       const handler: ToolHandler = async () => "result";
130 |       expect(handler).toBeTypeOf("function");
131 |     });
132 | 
133 |     it("should define ToolHandlersRegistry structure", () => {
134 |       const registry: ToolHandlersRegistry = {
135 |         chat_perplexity: async () => "result",
136 |         search: async () => "result",
137 |       };
138 | 
139 |       expect(registry).toBeDefined();
140 |     });
141 | 
142 |     it("should define argument types", () => {
143 |       const chatArgs: ChatPerplexityArgs = {
144 |         message: "test message",
145 |       };
146 | 
147 |       const extractArgs: ExtractUrlContentArgs = {
148 |         url: "https://example.com",
149 |       };
150 | 
151 |       const searchArgs: SearchArgs = {
152 |         query: "test query",
153 |         detail_level: "normal",
154 |       };
155 | 
156 |       expect(chatArgs.message).toBeTypeOf("string");
157 |       expect(extractArgs.url).toBeTypeOf("string");
158 |       expect(searchArgs.query).toBeTypeOf("string");
159 |     });
160 |   });
161 | 
162 |   describe("Server Types", () => {
163 |     it("should define ServerDependencies structure", () => {
164 |       const dependencies: ServerDependencies = {
165 |         browserManager: undefined,
166 |         searchEngine: undefined,
167 |         databaseManager: undefined,
168 |       };
169 | 
170 |       expect(dependencies).toBeDefined();
171 |     });
172 |   });
173 | });
174 | 


--------------------------------------------------------------------------------
/src/server/modules/BrowserManager.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * BrowserManager - Handles all Puppeteer browser operations
  3 |  * Focused, testable module for browser automation
  4 |  */
  5 | import type { Browser, Page } from "puppeteer";
  6 | import type { IBrowserManager, PuppeteerContext } from "../../types/index.js";
  7 | import { logError, logInfo, logWarn } from "../../utils/logging.js";
  8 | import {
  9 |   checkForCaptcha,
 10 |   initializeBrowser,
 11 |   navigateToPerplexity,
 12 |   recoveryProcedure,
 13 |   resetIdleTimeout,
 14 |   waitForSearchInput,
 15 | } from "../../utils/puppeteer.js";
 16 | 
 17 | export class BrowserManager implements IBrowserManager {
 18 |   public browser: Browser | null = null;
 19 |   public page: Page | null = null;
 20 |   public isInitializing = false;
 21 |   public searchInputSelector = 'textarea[placeholder*="Ask"]';
 22 |   public readonly lastSearchTime = 0;
 23 |   public idleTimeout: NodeJS.Timeout | null = null;
 24 |   public operationCount = 0;
 25 |   public readonly IDLE_TIMEOUT_MS = 5 * 60 * 1000;
 26 | 
 27 |   public getPuppeteerContext(): PuppeteerContext {
 28 |     return {
 29 |       browser: this.browser,
 30 |       page: this.page,
 31 |       isInitializing: this.isInitializing,
 32 |       searchInputSelector: this.searchInputSelector,
 33 |       lastSearchTime: this.lastSearchTime,
 34 |       idleTimeout: this.idleTimeout,
 35 |       operationCount: this.operationCount,
 36 |       log: this.log.bind(this),
 37 |       setBrowser: (browser) => {
 38 |         this.browser = browser;
 39 |       },
 40 |       setPage: (page) => {
 41 |         this.page = page;
 42 |       },
 43 |       setIsInitializing: (val) => {
 44 |         this.isInitializing = val;
 45 |       },
 46 |       setSearchInputSelector: (selector) => {
 47 |         this.searchInputSelector = selector;
 48 |       },
 49 |       setIdleTimeout: (timeout) => {
 50 |         this.idleTimeout = timeout;
 51 |       },
 52 |       incrementOperationCount: () => ++this.operationCount,
 53 |       determineRecoveryLevel: this.determineRecoveryLevel.bind(this),
 54 |       IDLE_TIMEOUT_MS: this.IDLE_TIMEOUT_MS,
 55 |     };
 56 |   }
 57 | 
 58 |   private log(level: "info" | "error" | "warn", message: string) {
 59 |     switch (level) {
 60 |       case "info":
 61 |         logInfo(message);
 62 |         break;
 63 |       case "warn":
 64 |         logWarn(message);
 65 |         break;
 66 |       case "error":
 67 |         logError(message);
 68 |         break;
 69 |       default:
 70 |         logInfo(message);
 71 |     }
 72 |   }
 73 | 
 74 |   private determineRecoveryLevel(error?: Error): number {
 75 |     if (!error) return 1;
 76 | 
 77 |     const errorMessage = error.message.toLowerCase();
 78 | 
 79 |     // Level 3: Critical errors requiring full browser restart
 80 |     if (
 81 |       errorMessage.includes("detached") ||
 82 |       errorMessage.includes("crashed") ||
 83 |       errorMessage.includes("disconnected") ||
 84 |       errorMessage.includes("protocol error")
 85 |     ) {
 86 |       return 3;
 87 |     }
 88 | 
 89 |     // Level 2: Navigation/page errors requiring page restart
 90 |     if (
 91 |       errorMessage.includes("navigation") ||
 92 |       errorMessage.includes("timeout") ||
 93 |       errorMessage.includes("net::err")
 94 |     ) {
 95 |       return 2;
 96 |     }
 97 | 
 98 |     // Level 1: Minor errors requiring simple recovery
 99 |     return 1;
100 |   }
101 | 
102 |   async initialize(): Promise<void> {
103 |     if (this.isInitializing) {
104 |       logInfo("Browser initialization already in progress...");
105 |       return;
106 |     }
107 | 
108 |     try {
109 |       const ctx = this.getPuppeteerContext();
110 |       await initializeBrowser(ctx);
111 |       logInfo("BrowserManager initialized successfully");
112 |     } catch (error) {
113 |       logError("BrowserManager initialization failed:", {
114 |         error: error instanceof Error ? error.message : String(error),
115 |       });
116 |       throw error;
117 |     }
118 |   }
119 | 
120 |   async navigateToPerplexity(): Promise<void> {
121 |     const ctx = this.getPuppeteerContext();
122 |     await navigateToPerplexity(ctx);
123 |   }
124 | 
125 |   async waitForSearchInput(): Promise<string | null> {
126 |     const ctx = this.getPuppeteerContext();
127 |     const selector = await waitForSearchInput(ctx);
128 |     return selector;
129 |   }
130 | 
131 |   async checkForCaptcha(): Promise<boolean> {
132 |     const ctx = this.getPuppeteerContext();
133 |     return await checkForCaptcha(ctx);
134 |   }
135 | 
136 |   async performRecovery(error?: Error): Promise<void> {
137 |     const ctx = this.getPuppeteerContext();
138 |     await recoveryProcedure(ctx, error);
139 |   }
140 | 
141 |   isReady(): boolean {
142 |     return !!(this.browser && this.page && !this.page.isClosed() && !this.isInitializing);
143 |   }
144 | 
145 |   async cleanup(): Promise<void> {
146 |     try {
147 |       if (this.idleTimeout) {
148 |         clearTimeout(this.idleTimeout);
149 |         this.idleTimeout = null;
150 |       }
151 | 
152 |       if (this.page && !this.page.isClosed()) {
153 |         await this.page.close();
154 |       }
155 | 
156 |       if (this.browser?.isConnected()) {
157 |         await this.browser.close();
158 |       }
159 | 
160 |       this.page = null;
161 |       this.browser = null;
162 |       this.isInitializing = false;
163 | 
164 |       logInfo("BrowserManager cleanup completed");
165 |     } catch (error) {
166 |       logError("Error during BrowserManager cleanup:", {
167 |         error: error instanceof Error ? error.message : String(error),
168 |       });
169 |     }
170 |   }
171 | 
172 |   getPage(): Page | null {
173 |     return this.page;
174 |   }
175 | 
176 |   getBrowser(): Browser | null {
177 |     return this.browser;
178 |   }
179 | 
180 |   resetIdleTimeout(): void {
181 |     const ctx = this.getPuppeteerContext();
182 |     resetIdleTimeout(ctx);
183 |   }
184 | }
185 | 


--------------------------------------------------------------------------------
/src/login.ts:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env node
  2 | /**
  3 |  * Perplexity Pro Account Login Script
  4 |  *
  5 |  * This script opens a browser window for users to manually log into their
  6 |  * Perplexity Pro account. The session is saved to a persistent profile
  7 |  * that will be used by the MCP server.
  8 |  *
  9 |  * Usage: bun run login
 10 |  */
 11 | 
 12 | import { existsSync, mkdirSync } from "node:fs";
 13 | import puppeteer from "puppeteer";
 14 | import { CONFIG } from "./server/config.js";
 15 | import { generateBrowserArgs } from "./utils/puppeteer-logic.js";
 16 | 
 17 | const PERPLEXITY_URL = "https://www.perplexity.ai";
 18 | 
 19 | // Same evasion setup as the main server (from puppeteer.ts)
 20 | async function setupBrowserEvasion(page: import("puppeteer").Page) {
 21 |     await page.evaluateOnNewDocument(() => {
 22 |         Object.defineProperties(navigator, {
 23 |             webdriver: { get: () => undefined },
 24 |             hardwareConcurrency: { get: () => 8 },
 25 |             deviceMemory: { get: () => 8 },
 26 |             platform: { get: () => "Win32" },
 27 |             languages: { get: () => ["en-US", "en"] },
 28 |             permissions: {
 29 |                 get: () => ({
 30 |                     query: async () => ({ state: "prompt" }),
 31 |                 }),
 32 |             },
 33 |         });
 34 |         if (typeof window.chrome === "undefined") {
 35 |             (window as any).chrome = {
 36 |                 app: {
 37 |                     InstallState: { DISABLED: "disabled", INSTALLED: "installed", NOT_INSTALLED: "not_installed" },
 38 |                     RunningState: { CANNOT_RUN: "cannot_run", READY_TO_RUN: "ready_to_run", RUNNING: "running" },
 39 |                     getDetails: () => { },
 40 |                     getIsInstalled: () => { },
 41 |                     installState: () => { },
 42 |                     isInstalled: false,
 43 |                     runningState: () => { },
 44 |                 },
 45 |                 runtime: {
 46 |                     OnInstalledReason: { CHROME_UPDATE: "chrome_update", INSTALL: "install", SHARED_MODULE_UPDATE: "shared_module_update", UPDATE: "update" },
 47 |                     PlatformArch: { ARM: "arm", ARM64: "arm64", MIPS: "mips", MIPS64: "mips64", X86_32: "x86-32", X86_64: "x86-64" },
 48 |                     PlatformNaclArch: { ARM: "arm", MIPS: "mips", PNACL: "pnacl", X86_32: "x86-32", X86_64: "x86-64" },
 49 |                     PlatformOs: { ANDROID: "android", CROS: "cros", LINUX: "linux", MAC: "mac", OPENBSD: "openbsd", WIN: "win" },
 50 |                     RequestUpdateCheckStatus: { NO_UPDATE: "no_update", THROTTLED: "throttled", UPDATE_AVAILABLE: "update_available" },
 51 |                     connect: () => ({ postMessage: () => { }, onMessage: { addListener: () => { }, removeListener: () => { } }, disconnect: () => { } }),
 52 |                 },
 53 |             };
 54 |         }
 55 |     });
 56 | }
 57 | 
 58 | async function main() {
 59 |     console.log("🔐 Perplexity Pro Account Login\n");
 60 | 
 61 |     // Ensure profile directory exists
 62 |     const profileDir = CONFIG.BROWSER_DATA_DIR;
 63 |     if (!existsSync(profileDir)) {
 64 |         mkdirSync(profileDir, { recursive: true });
 65 |         console.log(`📁 Created profile directory: ${profileDir}`);
 66 |     }
 67 | 
 68 |     console.log(`📂 Using profile directory: ${profileDir}\n`);
 69 |     console.log("🌐 Opening browser...\n");
 70 | 
 71 |     // Use minimal args for interactive login - avoid aggressive flags that break UI
 72 |     const browser = await puppeteer.launch({
 73 |         headless: false,
 74 |         args: [
 75 |             "--no-sandbox",
 76 |             "--disable-setuid-sandbox",
 77 |             "--disable-infobars",
 78 |             "--window-size=1280,720",
 79 |             "--disable-blink-features=AutomationControlled", // Critical for Cloudflare
 80 |             "--disable-web-security", // Critical for CORS (Perplexity assets)
 81 |             "--disable-features=IsolateOrigins,site-per-process", // Critical for CORS
 82 |         ],
 83 |         userDataDir: profileDir,
 84 |         ignoreDefaultArgs: ["--enable-automation"], // Hide "Chrome is being controlled by automated test software"
 85 |     });
 86 | 
 87 |     // Use the existing page (don't create a new one - that leaves about:blank open)
 88 |     const pages = await browser.pages();
 89 |     const page = pages[0] || (await browser.newPage());
 90 | 
 91 |     // Apply same evasion as main server
 92 |     await setupBrowserEvasion(page);
 93 | 
 94 |     // Same viewport and user agent as main server
 95 |     await page.setViewport({
 96 |         width: 1280,
 97 |         height: 720,
 98 |         deviceScaleFactor: 1,
 99 |         isMobile: false,
100 |         hasTouch: false,
101 |     });
102 |     await page.setUserAgent(CONFIG.USER_AGENT);
103 |     page.setDefaultNavigationTimeout(CONFIG.PAGE_TIMEOUT);
104 | 
105 |     console.log("📍 Navigating to Perplexity...");
106 |     console.log(`   URL: ${PERPLEXITY_URL}\n`);
107 | 
108 |     try {
109 |         await page.goto(PERPLEXITY_URL, {
110 |             waitUntil: "domcontentloaded",
111 |             timeout: CONFIG.PAGE_TIMEOUT,
112 |         });
113 |         console.log("✅ Navigation successful!\n");
114 |     } catch (err) {
115 |         console.log(`⚠️  Navigation issue: ${err instanceof Error ? err.message : err}`);
116 |         console.log("   The browser is ready - you can navigate manually if needed.\n");
117 |     }
118 |     console.log("═══════════════════════════════════════════════════════════════");
119 |     console.log("║                                                             ║");
120 |     console.log("║   👋 INSTRUCTIONS:                                          ║");
121 |     console.log("║                                                             ║");
122 |     console.log("║   1. Complete any Cloudflare verification if shown          ║");
123 |     console.log("║   2. Log into your Perplexity Pro account                   ║");
124 |     console.log("║   3. Once logged in, close the browser window               ║");
125 |     console.log("║   4. Your session will be saved automatically               ║");
126 |     console.log("║                                                             ║");
127 |     console.log("═══════════════════════════════════════════════════════════════\n");
128 | 
129 |     // Wait for browser to close
130 |     await new Promise<void>((resolve) => {
131 |         browser.on("disconnected", () => resolve());
132 |     });
133 | 
134 |     console.log("\n✅ Login session saved successfully!");
135 |     console.log("🚀 You can now use the MCP server with your Pro account.\n");
136 | }
137 | 
138 | main().catch((error) => {
139 |     console.error("❌ Login failed:", error.message);
140 |     process.exit(1);
141 | });
142 | 


--------------------------------------------------------------------------------
/src/__tests__/unit/extraction.test.ts:
--------------------------------------------------------------------------------
  1 | import { beforeEach, describe, expect, it, vi } from "vitest";
  2 | import type { Page } from "puppeteer";
  3 | import type { PageContentResult, PuppeteerContext } from "../../types/browser.js";
  4 | 
  5 | // Mock types for testing - these are partial implementations that satisfy the test requirements
  6 | type MockPage = Pick<Page, never> & { evaluate: ReturnType<typeof vi.fn> };
  7 | type MockPuppeteerContext = Pick<PuppeteerContext, never> & { log: ReturnType<typeof vi.fn> };
  8 | 
  9 | // Mock external dependencies
 10 | vi.mock("@mozilla/readability", () => ({
 11 |   Readability: vi.fn().mockImplementation(() => ({
 12 |     parse: vi.fn().mockReturnValue({
 13 |       title: "Test Title",
 14 |       textContent: "Test content from Readability",
 15 |     }),
 16 |   })),
 17 | }));
 18 | 
 19 | vi.mock("jsdom", () => ({
 20 |   JSDOM: vi.fn().mockImplementation(() => ({
 21 |     window: {
 22 |       document: {
 23 |         querySelector: vi.fn(),
 24 |         querySelectorAll: vi.fn().mockReturnValue([]),
 25 |         title: "Test Page",
 26 |       },
 27 |     },
 28 |     serialize: vi.fn().mockReturnValue("<html></html>"),
 29 |   })),
 30 | }));
 31 | 
 32 | vi.mock("axios", () => ({
 33 |   default: {
 34 |     head: vi.fn(),
 35 |   },
 36 | }));
 37 | 
 38 | // Mock internal dependencies
 39 | vi.mock("../../server/config.js", () => ({
 40 |   CONFIG: {
 41 |     USER_AGENT: "test-agent",
 42 |     TIMEOUT_PROFILES: {
 43 |       navigation: 30000,
 44 |       content: 60000,
 45 |     },
 46 |   },
 47 | }));
 48 | 
 49 | vi.mock("../../utils/logging.js", () => ({
 50 |   logInfo: vi.fn(),
 51 |   logWarn: vi.fn(),
 52 |   logError: vi.fn(),
 53 | }));
 54 | 
 55 | vi.mock("../../utils/puppeteer.js", () => ({
 56 |   initializeBrowser: vi.fn(),
 57 | }));
 58 | 
 59 | vi.mock("../../utils/fetch.js", () => ({
 60 |   fetchSimpleContent: vi.fn(),
 61 | }));
 62 | 
 63 | describe("Extraction Utilities", () => {
 64 |   beforeEach(() => {
 65 |     vi.clearAllMocks();
 66 |   });
 67 | 
 68 |   describe("Single Page Content Fetching", () => {
 69 |     it("should fetch and extract content from a single page", async () => {
 70 |       const { fetchSinglePageContent } = await import("../../utils/extraction.js");
 71 | 
 72 |       // We'll test this by focusing on the structure and not deep mocking
 73 |       expect(fetchSinglePageContent).toBeDefined();
 74 |       expect(typeof fetchSinglePageContent).toBe("function");
 75 |     });
 76 |   });
 77 | 
 78 |   describe("Link Extraction", () => {
 79 |     it("should extract same-domain links from a page", async () => {
 80 |       const { extractSameDomainLinks } = await import("../../utils/extraction.js");
 81 | 
 82 |       const mockPage: MockPage = {
 83 |         evaluate: vi.fn().mockResolvedValue([
 84 |           { url: "/page1", text: "Page 1" },
 85 |           { url: "/page2", text: "Page 2" },
 86 |           { url: "https://example.com/page3", text: "Page 3" },
 87 |         ]),
 88 |       };
 89 | 
 90 |       const result = await extractSameDomainLinks(mockPage as unknown as Page, "https://example.com");
 91 | 
 92 |       expect(result).toHaveLength(3);
 93 |       expect(result[0]?.url).toContain("https://example.com");
 94 |     });
 95 | 
 96 |     it("should filter out invalid and cross-domain links", async () => {
 97 |       const { extractSameDomainLinks } = await import("../../utils/extraction.js");
 98 | 
 99 |       const mockPage: MockPage = {
100 |         evaluate: vi.fn().mockResolvedValue([
101 |           { url: "javascript:void(0)", text: "Invalid Link" },
102 |           { url: "mailto:test@example.com", text: "Email Link" },
103 |           { url: "https://other.com/page", text: "Cross Domain" },
104 |           { url: "/valid-page", text: "Valid Page" },
105 |         ]),
106 |       };
107 | 
108 |       const result = await extractSameDomainLinks(mockPage as unknown as Page, "https://example.com");
109 | 
110 |       // Should only have the valid same-domain link
111 |       expect(result).toHaveLength(1);
112 |       expect(result[0]?.url).toBe("https://example.com/valid-page");
113 |     });
114 | 
115 |     it("should handle link extraction errors gracefully", async () => {
116 |       const { extractSameDomainLinks } = await import("../../utils/extraction.js");
117 | 
118 |       const mockPage: MockPage = {
119 |         evaluate: vi.fn().mockRejectedValue(new Error("Evaluation failed")),
120 |       };
121 | 
122 |       const result = await extractSameDomainLinks(mockPage as unknown as Page, "https://example.com");
123 | 
124 |       expect(result).toEqual([]);
125 |     });
126 |   });
127 | 
128 |   describe("Recursive Content Fetching", () => {
129 |     it("should recursively fetch content with depth limiting", async () => {
130 |       const { recursiveFetch } = await import("../../utils/extraction.js");
131 | 
132 |       // Test that the function exists and can be called
133 |       expect(recursiveFetch).toBeDefined();
134 |       expect(typeof recursiveFetch).toBe("function");
135 |     });
136 | 
137 |     it("should respect timeout signal during recursive fetch", async () => {
138 |       const { recursiveFetch } = await import("../../utils/extraction.js");
139 | 
140 |       const mockCtx: MockPuppeteerContext = { log: vi.fn() };
141 |       const visitedUrls = new Set<string>();
142 |       const results: PageContentResult[] = [];
143 |       const globalTimeoutSignal = { timedOut: true }; // Already timed out
144 | 
145 |       await recursiveFetch(
146 |         "https://example.com",
147 |         2,
148 |         1,
149 |         visitedUrls,
150 |         results,
151 |         globalTimeoutSignal,
152 |         mockCtx as unknown as PuppeteerContext,
153 |       );
154 | 
155 |       expect(results).toHaveLength(0);
156 |     });
157 | 
158 |     it("should handle basic recursive fetch flow", async () => {
159 |       const { recursiveFetch } = await import("../../utils/extraction.js");
160 | 
161 |       const mockCtx: MockPuppeteerContext = { log: vi.fn() };
162 |       const visitedUrls = new Set<string>();
163 |       const results: PageContentResult[] = [];
164 |       const globalTimeoutSignal = { timedOut: false };
165 | 
166 |       await recursiveFetch(
167 |         "https://example.com",
168 |         1,
169 |         1,
170 |         visitedUrls,
171 |         results,
172 |         globalTimeoutSignal,
173 |         mockCtx as unknown as PuppeteerContext,
174 |       );
175 | 
176 |       // Should have attempted to process the URL
177 |       expect(mockCtx.log).toHaveBeenCalledWith("info", "[Depth 1] Fetching: https://example.com");
178 |     });
179 | 
180 |     it("should fetch simpler content for deeper levels", async () => {
181 |       const { recursiveFetch } = await import("../../utils/extraction.js");
182 | 
183 |       const mockCtx: MockPuppeteerContext = { log: vi.fn() };
184 |       const visitedUrls = new Set<string>();
185 |       const results: PageContentResult[] = [];
186 |       const globalTimeoutSignal = { timedOut: false };
187 | 
188 |       // Mock fetchSimpleContent
189 |       const { fetchSimpleContent } = await import("../../utils/fetch.js");
190 |       vi.mocked(fetchSimpleContent).mockResolvedValue({
191 |         url: "https://example.com/page1",
192 |         title: "Page 1",
193 |         textContent: "Page 1 content",
194 |         error: null,
195 |       } as any);
196 | 
197 |       await recursiveFetch(
198 |         "https://example.com/page1",
199 |         2,
200 |         2, // currentDepth > 1, should use fetchSimpleContent
201 |         visitedUrls,
202 |         results,
203 |         globalTimeoutSignal,
204 |         mockCtx as unknown as PuppeteerContext,
205 |       );
206 | 
207 |       // Should have attempted to process the URL
208 |       expect(mockCtx.log).toHaveBeenCalledWith(
209 |         "info",
210 |         "[Depth 2] Fetching: https://example.com/page1",
211 |       );
212 |     });
213 |   });
214 | });
215 | 


--------------------------------------------------------------------------------
/src/utils/fetch.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Utility for simple HTTP content fetching and basic HTML/text extraction.
  3 |  * @param url - The URL to fetch
  4 |  * @param ctx - PuppeteerContext for logging and config
  5 |  * @returns { title, textContent, error }
  6 |  */
  7 | import { Readability } from "@mozilla/readability";
  8 | import axios from "axios";
  9 | import { JSDOM } from "jsdom";
 10 | import { CONFIG } from "../server/config.js";
 11 | import type { PuppeteerContext } from "../types/index.js";
 12 | 
 13 | // Helper functions for fetch content
 14 | async function performHttpRequest(url: string, ctx: PuppeteerContext) {
 15 |   ctx?.log?.("info", `Simple fetch starting for: ${url}`);
 16 | 
 17 |   const response = await axios.get(url, {
 18 |     timeout: 8000, // Reduced from 15000
 19 |     headers: {
 20 |       "User-Agent": CONFIG.USER_AGENT,
 21 |       Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
 22 |       "Accept-Language": "en-US,en;q=0.5",
 23 |       "Accept-Encoding": "gzip, deflate",
 24 |       Connection: "keep-alive",
 25 |       "Upgrade-Insecure-Requests": "1",
 26 |     },
 27 |     validateStatus: (status) => status >= 200 && status < 400, // Accept 2xx and 3xx
 28 |     maxRedirects: 3, // Limit redirects for faster response
 29 |   });
 30 | 
 31 |   return response;
 32 | }
 33 | 
 34 | function validateContentType(contentType: string, ctx: PuppeteerContext): string | null {
 35 |   if (
 36 |     !contentType.includes("html") &&
 37 |     !contentType.includes("text/plain") &&
 38 |     !contentType.includes("text/")
 39 |   ) {
 40 |     const errorMsg = `Unsupported content type: ${contentType}`;
 41 |     ctx?.log?.("warn", errorMsg);
 42 |     return errorMsg;
 43 |   }
 44 |   return null;
 45 | }
 46 | 
 47 | function validateResponseData(data: unknown, ctx: PuppeteerContext): string | null {
 48 |   if (typeof data !== "string") {
 49 |     const errorMsg = "Response data is not a string";
 50 |     ctx?.log?.("warn", errorMsg);
 51 |     return errorMsg;
 52 |   }
 53 |   return null;
 54 | }
 55 | 
 56 | function extractHtmlContent(
 57 |   dom: JSDOM,
 58 |   ctx: PuppeteerContext,
 59 | ): { title: string | null; textContent: string } {
 60 |   let title = dom.window.document.title ?? null;
 61 |   let textContent = "";
 62 | 
 63 |   // Try Readability first for better content extraction
 64 |   try {
 65 |     const reader = new Readability(dom.window.document);
 66 |     const article = reader.parse();
 67 | 
 68 |     if (article?.textContent && article.textContent.trim().length > 100) {
 69 |       title = article.title ?? title;
 70 |       textContent = article.textContent.trim();
 71 |       ctx?.log?.("info", `Readability extraction successful (${textContent.length} chars)`);
 72 |     } else {
 73 |       // Fallback to body text extraction
 74 |       textContent = dom.window.document.body?.textContent ?? "";
 75 |       ctx?.log?.("info", "Readability failed, using body text extraction");
 76 |     }
 77 |   } catch (readabilityError) {
 78 |     ctx?.log?.("warn", `Readability failed: ${readabilityError}, falling back to body text`);
 79 |     textContent = dom.window.document.body?.textContent ?? "";
 80 |   }
 81 | 
 82 |   return { title, textContent };
 83 | }
 84 | 
 85 | function extractContent(
 86 |   contentType: string,
 87 |   responseData: string,
 88 |   url: string,
 89 |   ctx: PuppeteerContext,
 90 | ): { title: string | null; textContent: string } {
 91 |   const dom = new JSDOM(responseData, { url });
 92 | 
 93 |   if (contentType.includes("html")) {
 94 |     return extractHtmlContent(dom, ctx);
 95 |   }
 96 | 
 97 |   // For non-HTML content, just get the text
 98 |   return { title: dom.window.document.title ?? null, textContent: responseData };
 99 | }
100 | 
101 | function processTextContent(
102 |   textContent: string,
103 |   ctx: PuppeteerContext,
104 | ): { processedContent: string | null; error?: string } {
105 |   // Clean up the text content
106 |   let processed = textContent.replace(/\s+/g, " ").trim();
107 | 
108 |   if (processed.length > 15000) {
109 |     // Truncate if too long
110 |     processed = `${processed.substring(0, 15000)}... (content truncated)`;
111 |     ctx?.log?.("info", "Content truncated due to length");
112 |   }
113 | 
114 |   if (processed.length < 50) {
115 |     const errorMsg = "Extracted content is too short to be meaningful";
116 |     ctx?.log?.("warn", errorMsg);
117 |     return { processedContent: null, error: errorMsg };
118 |   }
119 | 
120 |   return { processedContent: processed };
121 | }
122 | 
123 | function formatAxiosError(
124 |   axiosError: Error & { response?: { status?: number; statusText?: string }; code?: string },
125 | ): string {
126 |   if (axiosError.response?.status) {
127 |     const status = axiosError.response.status;
128 |     if (status >= 400 && status < 500) {
129 |       return `Client error (${status}): ${axiosError.response.statusText ?? "Unknown error"}`;
130 |     }
131 |     if (status >= 500) {
132 |       return `Server error (${status}): ${axiosError.response.statusText ?? "Unknown error"}`;
133 |     }
134 |     return `HTTP error (${status}): ${axiosError.response.statusText ?? "Unknown error"}`;
135 |   }
136 | 
137 |   if (axiosError.code) {
138 |     // Network errors
139 |     switch (axiosError.code) {
140 |       case "ECONNABORTED":
141 |         return "Request timeout - server took too long to respond";
142 |       case "ENOTFOUND":
143 |         return "DNS resolution failed - domain not found";
144 |       case "ECONNREFUSED":
145 |         return "Connection refused - server is not accepting connections";
146 |       case "ECONNRESET":
147 |         return "Connection reset - network connection was interrupted";
148 |       case "ETIMEDOUT":
149 |         return "Connection timeout - failed to establish connection";
150 |       default:
151 |         return `Network error (${axiosError.code}): ${axiosError.message}`;
152 |     }
153 |   }
154 | 
155 |   return `Request failed: ${axiosError.message}`;
156 | }
157 | 
158 | function formatErrorMessage(error: unknown): string {
159 |   if (!(error instanceof Error)) {
160 |     return `Unexpected error: ${String(error)}`;
161 |   }
162 | 
163 |   const errorDetails = error.message;
164 | 
165 |   if (error.name === "AxiosError" && "response" in error) {
166 |     const axiosError = error as Error & {
167 |       response?: { status?: number; statusText?: string };
168 |       code?: string;
169 |     };
170 |     return formatAxiosError(axiosError);
171 |   }
172 | 
173 |   if (errorDetails.includes("timeout")) {
174 |     return "Request timeout - server took too long to respond";
175 |   }
176 |   if (errorDetails.includes("ENOTFOUND")) {
177 |     return "DNS resolution failed - domain not found";
178 |   }
179 |   if (errorDetails.includes("ECONNREFUSED")) {
180 |     return "Connection refused - server is not accepting connections";
181 |   }
182 | 
183 |   return `Request failed: ${errorDetails}`;
184 | }
185 | 
186 | export async function fetchSimpleContent(
187 |   url: string,
188 |   ctx: PuppeteerContext,
189 | ): Promise<{ title: string | null; textContent: string | null; error?: string }> {
190 |   try {
191 |     const response = await performHttpRequest(url, ctx);
192 | 
193 |     const contentType = response.headers["content-type"] ?? "";
194 |     ctx?.log?.("info", `Content-Type: ${contentType}, Status: ${response.status}`);
195 | 
196 |     const contentTypeError = validateContentType(contentType, ctx);
197 |     if (contentTypeError) {
198 |       return { title: null, textContent: null, error: contentTypeError };
199 |     }
200 | 
201 |     const dataError = validateResponseData(response.data, ctx);
202 |     if (dataError) {
203 |       return { title: null, textContent: null, error: dataError };
204 |     }
205 | 
206 |     const { title, textContent } = extractContent(contentType, response.data, url, ctx);
207 |     const { processedContent, error: processingError } = processTextContent(textContent, ctx);
208 | 
209 |     if (processingError ?? !processedContent) {
210 |       return { title, textContent: null, error: processingError };
211 |     }
212 | 
213 |     ctx?.log?.("info", `Simple fetch successful (${processedContent.length} chars)`);
214 |     return { title, textContent: processedContent };
215 |   } catch (error: unknown) {
216 |     const errorMsg = formatErrorMessage(error);
217 |     ctx?.log?.("error", `Simple fetch failed for ${url}: ${errorMsg}`);
218 |     return { title: null, textContent: null, error: errorMsg };
219 |   }
220 | }
221 | 


--------------------------------------------------------------------------------
/src/tools/search.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Tool implementation for web search functionality with real streaming support
  3 |  */
  4 | 
  5 | import type { PuppeteerContext } from "../types/index.js";
  6 | 
  7 | /**
  8 |  * Handles web search with configurable detail levels and optional streaming
  9 |  */
 10 | export default async function search(
 11 |   args: {
 12 |     query: string;
 13 |     detail_level?: "brief" | "normal" | "detailed";
 14 |     stream?: boolean;
 15 |   },
 16 |   ctx: PuppeteerContext,
 17 |   performSearch: (prompt: string, ctx: PuppeteerContext) => Promise<string>,
 18 | ): Promise<string | AsyncGenerator<string, void, unknown>> {
 19 |   const { query, detail_level = "normal", stream = false } = args;
 20 | 
 21 |   let prompt = query;
 22 |   switch (detail_level) {
 23 |     case "brief":
 24 |       prompt = `Provide a brief, concise answer to: ${query}`;
 25 |       break;
 26 |     case "detailed":
 27 |       prompt = `Provide a comprehensive, detailed analysis of: ${query}. Include relevant examples, context, and supporting information where applicable.`;
 28 |       break;
 29 |     default:
 30 |       prompt = `Provide a clear, balanced answer to: ${query}. Include key points and relevant context.`;
 31 |   }
 32 | 
 33 |   // If streaming is not requested, return traditional response
 34 |   if (!stream) {
 35 |     return await performSearch(prompt, ctx);
 36 |   }
 37 | 
 38 |   // Return real streaming generator that monitors browser automation
 39 |   return realTimeStreamingSearch(prompt, ctx, performSearch);
 40 | }
 41 | 
 42 | // Helper functions for streaming search
 43 | async function* streamBrowserSetup(ctx: PuppeteerContext): AsyncGenerator<string, void, unknown> {
 44 |   yield "🌐 Initializing browser connection...\n";
 45 | 
 46 |   if (!ctx.browser || !ctx.page || ctx.page?.isClosed()) {
 47 |     yield "🔧 Setting up browser instance...\n";
 48 |   } else {
 49 |     yield "✅ Browser ready, navigating to Perplexity...\n";
 50 |   }
 51 | }
 52 | 
 53 | async function* streamSearchInitiation(prompt: string): AsyncGenerator<string, void, unknown> {
 54 |   yield "📡 Connecting to Perplexity AI...\n";
 55 |   yield `⌨️  Submitting query: "${prompt.substring(0, 100)}${prompt.length > 100 ? "..." : ""}"\n\n`;
 56 | }
 57 | 
 58 | async function* streamSearchExecution(
 59 |   prompt: string,
 60 |   ctx: PuppeteerContext,
 61 |   performSearch: (prompt: string, ctx: PuppeteerContext) => Promise<string>,
 62 | ): AsyncGenerator<string, void, unknown> {
 63 |   let searchCompleted = false;
 64 |   let finalResult = "";
 65 | 
 66 |   // Monitor content while search is running
 67 |   const monitoringTask = monitorPageContent(ctx);
 68 | 
 69 |   // Start both search and monitoring
 70 |   const searchTask = performSearch(prompt, ctx).then((result) => {
 71 |     searchCompleted = true;
 72 |     finalResult = result;
 73 |     return result;
 74 |   });
 75 | 
 76 |   // Stream monitoring updates while search runs
 77 |   for await (const contentUpdate of monitoringTask) {
 78 |     if (searchCompleted) break;
 79 |     yield contentUpdate;
 80 |   }
 81 | 
 82 |   // Ensure search is complete
 83 |   await searchTask;
 84 | 
 85 |   if (finalResult) {
 86 |     yield* streamSearchResults(finalResult);
 87 |   }
 88 | }
 89 | 
 90 | async function* streamSearchResults(result: string): AsyncGenerator<string, void, unknown> {
 91 |   yield "\n\n📋 **Search Results:**\n\n";
 92 | 
 93 |   // Stream the final result in chunks for better UX
 94 |   const chunkSize = 300;
 95 |   for (let i = 0; i < result.length; i += chunkSize) {
 96 |     const chunk = result.slice(i, i + chunkSize);
 97 |     yield chunk;
 98 |     // Small delay to maintain streaming feel
 99 |     await new Promise<void>((resolve) => setTimeout(resolve, 50));
100 |   }
101 | }
102 | 
103 | async function* streamFallbackSearch(
104 |   prompt: string,
105 |   ctx: PuppeteerContext,
106 |   performSearch: (prompt: string, ctx: PuppeteerContext) => Promise<string>,
107 | ): AsyncGenerator<string, void, unknown> {
108 |   yield "⚠️  Streaming unavailable, falling back to standard search...\n\n";
109 |   const result = await performSearch(prompt, ctx);
110 |   yield result;
111 | }
112 | 
113 | function formatStreamingError(error: unknown): string {
114 |   const errorMessage = error instanceof Error && error.message ? error.message : "Unknown error";
115 |   return `\n\n❌ **Search failed:** ${errorMessage}\n💡 **Tip:** Try a more specific query or check your connection.\n`;
116 | }
117 | 
118 | /**
119 |  * Real-time streaming search implementation that monitors browser automation
120 |  * and streams content as it arrives from Perplexity
121 |  */
122 | async function* realTimeStreamingSearch(
123 |   prompt: string,
124 |   ctx: PuppeteerContext,
125 |   performSearch: (prompt: string, ctx: PuppeteerContext) => Promise<string>,
126 | ): AsyncGenerator<string, void, unknown> {
127 |   yield "🔍 **Starting documentation search...**\n\n";
128 | 
129 |   try {
130 |     // Stream browser setup status
131 |     yield* streamBrowserSetup(ctx);
132 | 
133 |     // Check if page is available for streaming
134 |     if (ctx.page && !ctx.page.isClosed()) {
135 |       yield* streamSearchInitiation(prompt);
136 |       yield* streamSearchExecution(prompt, ctx, performSearch);
137 |     } else {
138 |       yield* streamFallbackSearch(prompt, ctx, performSearch);
139 |     }
140 | 
141 |     yield "\n\n✅ **Search completed successfully!**";
142 |   } catch (error) {
143 |     yield formatStreamingError(error);
144 |     throw error;
145 |   }
146 | }
147 | 
148 | // Helper functions for content monitoring
149 | interface ContentCheckResult {
150 |   hasContent: boolean;
151 |   contentLength: number;
152 |   hasInputField: boolean;
153 |   pageState: string;
154 | }
155 | 
156 | function createContentCheck() {
157 |   return `
158 |     const proseElements = document.querySelectorAll(
159 |       '.prose, [class*="prose"], [class*="answer"], [class*="result"]'
160 |     );
161 |     let totalLength = 0;
162 | 
163 |     for (const element of proseElements) {
164 |       totalLength += (element.innerText?.length || 0);
165 |     }
166 | 
167 |     return {
168 |       hasContent: totalLength > 0,
169 |       contentLength: totalLength,
170 |       hasInputField: !!document.querySelector('textarea[placeholder*="Ask"]'),
171 |       pageState: document.readyState,
172 |     };
173 |   `;
174 | }
175 | 
176 | async function checkPageContent(ctx: PuppeteerContext): Promise<ContentCheckResult | null> {
177 |   if (!ctx.page || ctx.page.isClosed()) return null;
178 | 
179 |   try {
180 |     return (await ctx.page.evaluate(createContentCheck())) as ContentCheckResult;
181 |   } catch {
182 |     return null;
183 |   }
184 | }
185 | 
186 | function generateProgressUpdate(
187 |   contentCheck: ContentCheckResult,
188 |   lastContentLength: number,
189 |   startTime: number,
190 | ): string | null {
191 |   if (contentCheck.hasInputField && !contentCheck.hasContent) {
192 |     if (Date.now() - startTime > 2000) {
193 |       return "⏳ Waiting for AI response...\n";
194 |     }
195 |   } else if (contentCheck.hasContent && contentCheck.contentLength > lastContentLength) {
196 |     const status = lastContentLength === 0 ? " (response started)" : " (updating)";
197 |     return `📝 Content loading${status}...\n`;
198 |   }
199 |   return null;
200 | }
201 | 
202 | function shouldBreakMonitoring(contentCheck: ContentCheckResult): boolean {
203 |   return contentCheck.contentLength > 200 && contentCheck.pageState === "complete";
204 | }
205 | 
206 | /**
207 |  * Monitor page content for real-time updates during search
208 |  */
209 | async function* monitorPageContent(ctx: PuppeteerContext): AsyncGenerator<string, void, unknown> {
210 |   if (!ctx.page || ctx.page.isClosed()) return;
211 | 
212 |   try {
213 |     let lastContentLength = 0;
214 |     const maxMonitoringTime = 10000; // 10 seconds max monitoring
215 |     const startTime = Date.now();
216 | 
217 |     while (Date.now() - startTime < maxMonitoringTime) {
218 |       const contentCheck = await checkPageContent(ctx);
219 | 
220 |       if (!contentCheck) {
221 |         await new Promise<void>((resolve) => setTimeout(resolve, 1000));
222 |         continue;
223 |       }
224 | 
225 |       // Generate progress update if needed
226 |       const progressUpdate = generateProgressUpdate(contentCheck, lastContentLength, startTime);
227 |       if (progressUpdate) {
228 |         yield progressUpdate;
229 |         lastContentLength = contentCheck.contentLength;
230 |       }
231 | 
232 |       // Check if monitoring should break early
233 |       if (shouldBreakMonitoring(contentCheck)) {
234 |         yield "🎯 Response ready, finalizing...\n";
235 |         break;
236 |       }
237 | 
238 |       await new Promise<void>((resolve) => setTimeout(resolve, 500)); // Check every 500ms
239 |     }
240 |   } catch (error) {
241 |     // Monitoring failed, but don't break the main search
242 |     yield "⚠️  Live monitoring unavailable, search continuing...\n";
243 |   }
244 | }
245 | 


--------------------------------------------------------------------------------
/src/utils/puppeteer-logic.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Pure business logic extracted from puppeteer utilities
  3 |  * These functions can be tested without mocking Puppeteer
  4 |  */
  5 | 
  6 | import type { ErrorAnalysis, RecoveryContext } from "../types/index.js";
  7 | 
  8 | /**
  9 |  * Determine recovery level based on error and context
 10 |  */
 11 | export function determineRecoveryLevel(error?: Error, context?: RecoveryContext): number {
 12 |   if (!error) return 1;
 13 | 
 14 |   const errorMsg = error.message.toLowerCase();
 15 | 
 16 |   // Critical errors require full restart
 17 |   if (
 18 |     errorMsg.includes("frame") ||
 19 |     errorMsg.includes("detached") ||
 20 |     errorMsg.includes("session closed") ||
 21 |     errorMsg.includes("target closed") ||
 22 |     errorMsg.includes("protocol error")
 23 |   ) {
 24 |     return 3; // Full restart
 25 |   }
 26 | 
 27 |   // Browser connectivity issues
 28 |   if (!context?.hasBrowser || !context?.isBrowserConnected) {
 29 |     return 3; // Full restart
 30 |   }
 31 | 
 32 |   // Page issues
 33 |   if (!context?.hasValidPage) {
 34 |     return 2; // New page
 35 |   }
 36 | 
 37 |   // Default to page refresh
 38 |   return 1;
 39 | }
 40 | 
 41 | /**
 42 |  * Analyze error characteristics
 43 |  */
 44 | export function analyzeError(error: Error | string): ErrorAnalysis {
 45 |   const errorMsg = typeof error === "string" ? error : error.message;
 46 |   const lowerMsg = errorMsg.toLowerCase();
 47 | 
 48 |   return {
 49 |     isTimeout: lowerMsg.includes("timeout") || lowerMsg.includes("timed out"),
 50 |     isNavigation: lowerMsg.includes("navigation") || lowerMsg.includes("Navigation"),
 51 |     isConnection:
 52 |       lowerMsg.includes("net::") || lowerMsg.includes("connection") || lowerMsg.includes("network"),
 53 |     isDetachedFrame:
 54 |       lowerMsg.includes("frame") ||
 55 |       lowerMsg.includes("detached") ||
 56 |       lowerMsg.includes("session closed"),
 57 |     isCaptcha: lowerMsg.includes("captcha") || lowerMsg.includes("challenge"),
 58 |     consecutiveTimeouts: 0, // This would be tracked externally
 59 |     consecutiveNavigationErrors: 0, // This would be tracked externally
 60 |   };
 61 | }
 62 | 
 63 | /**
 64 |  * Generate non-cryptographic jitter for retry delays
 65 |  * Note: Math.random() is safe here - only used for timing distribution, not security
 66 |  */
 67 | function generateRetryJitter(maxJitter: number): number {
 68 |   return Math.random() * maxJitter;
 69 | }
 70 | 
 71 | /**
 72 |  * Generate variable delay for connection errors to distribute load
 73 |  * Note: Math.random() is safe here - only used for timing distribution, not security
 74 |  */
 75 | function generateConnectionDelay(): number {
 76 |   return 15000 + Math.random() * 10000; // 15-25 seconds
 77 | }
 78 | 
 79 | /**
 80 |  * Generate variable delay for detached frame errors
 81 |  * Note: Math.random() is safe here - only used for timing distribution, not security
 82 |  */
 83 | function generateDetachedFrameDelay(): number {
 84 |   return 10000 + Math.random() * 5000; // 10-15 seconds
 85 | }
 86 | 
 87 | /**
 88 |  * Calculate retry delay with exponential backoff and jitter
 89 |  */
 90 | export function calculateRetryDelay(
 91 |   attemptNumber: number,
 92 |   errorAnalysis: ErrorAnalysis,
 93 |   maxDelay = 30000,
 94 | ): number {
 95 |   let baseDelay: number;
 96 | 
 97 |   if (errorAnalysis.isTimeout) {
 98 |     baseDelay = Math.min(5000 * (errorAnalysis.consecutiveTimeouts + 1), maxDelay);
 99 |   } else if (errorAnalysis.isNavigation) {
100 |     baseDelay = Math.min(8000 * (errorAnalysis.consecutiveNavigationErrors + 1), 40000);
101 |   } else if (errorAnalysis.isConnection) {
102 |     baseDelay = generateConnectionDelay();
103 |   } else if (errorAnalysis.isDetachedFrame) {
104 |     baseDelay = generateDetachedFrameDelay();
105 |   } else {
106 |     // Standard exponential backoff
107 |     baseDelay = Math.min(1000 * 2 ** attemptNumber, maxDelay);
108 |   }
109 | 
110 |   // Add jitter to prevent thundering herd problems
111 |   const maxJitter = Math.min(1000 * (attemptNumber + 1), 10000);
112 |   const jitter = generateRetryJitter(maxJitter);
113 | 
114 |   return baseDelay + jitter;
115 | }
116 | 
117 | /**
118 |  * Generate comprehensive browser launch arguments optimized for Cloudflare bypass
119 |  */
120 | export function generateBrowserArgs(userAgent: string): string[] {
121 |   return [
122 |     // Essential security flags
123 |     "--no-sandbox",
124 |     "--disable-setuid-sandbox",
125 |     "--disable-dev-shm-usage",
126 |     "--disable-web-security",
127 | 
128 |     // Enhanced anti-detection for Cloudflare
129 |     "--disable-blink-features=AutomationControlled",
130 |     "--disable-features=IsolateOrigins,site-per-process",
131 |     "--disable-infobars",
132 |     "--disable-notifications",
133 |     "--disable-popup-blocking",
134 |     "--disable-default-apps",
135 |     "--disable-extensions",
136 |     "--disable-translate",
137 |     "--disable-sync",
138 |     "--disable-background-networking",
139 |     "--disable-client-side-phishing-detection",
140 |     "--disable-component-update",
141 |     "--disable-hang-monitor",
142 |     "--disable-prompt-on-repost",
143 |     "--disable-domain-reliability",
144 |     "--disable-renderer-backgrounding",
145 |     "--disable-background-timer-throttling",
146 |     "--disable-backgrounding-occluded-windows",
147 |     "--disable-breakpad",
148 |     "--disable-component-extensions-with-background-pages",
149 |     "--disable-ipc-flooding-protection",
150 |     "--disable-back-forward-cache",
151 |     "--disable-partial-raster",
152 |     "--disable-skia-runtime-opts",
153 |     "--disable-smooth-scrolling",
154 |     "--disable-features=site-per-process,TranslateUI,BlinkGenPropertyTrees",
155 |     "--enable-features=NetworkService,NetworkServiceInProcess",
156 | 
157 |     // Performance and resource optimizations
158 |     "--disable-accelerated-2d-canvas",
159 |     "--disable-gpu",
160 |     "--force-color-profile=srgb",
161 |     "--metrics-recording-only",
162 |     "--mute-audio",
163 |     "--no-first-run",
164 |     "--no-default-browser-check",
165 |     "--remote-debugging-port=0",
166 |     "--use-mock-keychain",
167 | 
168 |     // Window and viewport settings - optimized for low-end systems while maintaining realistic behavior
169 |     "--window-size=1280,720",
170 | 
171 |     // User agent
172 |     `--user-agent=${userAgent}`,
173 |   ];
174 | }
175 | 
176 | /**
177 |  * List of possible search input selectors in priority order
178 |  */
179 | export function getSearchInputSelectors(): string[] {
180 |   return [
181 |     'textarea[placeholder*="Ask"]',
182 |     'textarea[placeholder*="Search"]',
183 |     "textarea.w-full",
184 |     'textarea[rows="1"]',
185 |     '[role="textbox"]',
186 |     "textarea",
187 |   ];
188 | }
189 | 
190 | /**
191 |  * Comprehensive CAPTCHA and Cloudflare challenge detection selectors
192 |  */
193 | export function getCaptchaSelectors(): string[] {
194 |   return [
195 |     // Generic CAPTCHA selectors
196 |     '[class*="captcha"]',
197 |     '[id*="captcha"]',
198 |     'iframe[src*="captcha"]',
199 |     'iframe[src*="recaptcha"]',
200 | 
201 |     // Cloudflare Turnstile specific
202 |     'iframe[src*="turnstile"]',
203 |     '[class*="turnstile"]',
204 |     '[id*="turnstile"]',
205 | 
206 |     // Cloudflare challenge page selectors
207 |     "#challenge-running",
208 |     "#challenge-form",
209 |     ".challenge-running",
210 |     ".challenge-form",
211 |     '[class*="challenge"]',
212 |     '[id*="challenge"]',
213 | 
214 |     // Cloudflare specific elements
215 |     ".cf-browser-verification",
216 |     ".cf-checking-browser",
217 |     ".cf-under-attack",
218 |     "#cf-wrapper",
219 |     ".cf-im-under-attack",
220 | 
221 |     // Additional Cloudflare patterns
222 |     "[data-ray]", // Cloudflare Ray ID indicator
223 |     ".ray-id",
224 |     "#cf-error-details",
225 |     ".cf-error-overview",
226 | 
227 |     // Bot detection indicators
228 |     '[class*="bot-detection"]',
229 |     '[class*="security-check"]',
230 |     '[class*="verification"]',
231 | 
232 |     // Generic challenge indicators
233 |     'body[class*="challenge"]',
234 |     'html[class*="challenge"]',
235 |   ];
236 | }
237 | 
238 | /**
239 |  * Validate URL for navigation
240 |  */
241 | export function validateNavigationUrl(url: string, expectedDomain?: string): boolean {
242 |   try {
243 |     const parsedUrl = new URL(url);
244 | 
245 |     if (expectedDomain && !parsedUrl.hostname.includes(expectedDomain)) {
246 |       return false;
247 |     }
248 | 
249 |     return parsedUrl.protocol === "https:" || parsedUrl.protocol === "http:";
250 |   } catch {
251 |     return false;
252 |   }
253 | }
254 | 
255 | /**
256 |  * Check if error indicates a page navigation failure
257 |  */
258 | export function isNavigationFailure(url: string, expectedUrl?: string): boolean {
259 |   if (!url || url === "N/A") return true;
260 | 
261 |   if (expectedUrl) {
262 |     try {
263 |       const actual = new URL(url);
264 |       const expected = new URL(expectedUrl);
265 |       return actual.hostname !== expected.hostname;
266 |     } catch {
267 |       return true;
268 |     }
269 |   }
270 | 
271 |   return false;
272 | }
273 | 


--------------------------------------------------------------------------------
/src/server/PerplexityServer.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * PerplexityServer - Modular, testable architecture
  3 |  * Uses dependency injection and focused modules for better testability
  4 |  */
  5 | import { Server } from "@modelcontextprotocol/sdk/server/index.js";
  6 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
  7 | import type {
  8 |   IBrowserManager,
  9 |   IDatabaseManager,
 10 |   ISearchEngine,
 11 |   ServerDependencies,
 12 | } from "../types/index.js";
 13 | import { logError, logInfo } from "../utils/logging.js";
 14 | import { BrowserManager } from "./modules/BrowserManager.js";
 15 | import { DatabaseManager } from "./modules/DatabaseManager.js";
 16 | import { SearchEngine } from "./modules/SearchEngine.js";
 17 | import { createToolHandlersRegistry, setupToolHandlers } from "./toolHandlerSetup.js";
 18 | 
 19 | // Import modular tool implementations
 20 | import chatPerplexity from "../tools/chatPerplexity.js";
 21 | import extractUrlContent from "../tools/extractUrlContent.js";
 22 | 
 23 | export class PerplexityServer {
 24 |   private readonly server: Server;
 25 |   private readonly browserManager: IBrowserManager;
 26 |   private readonly searchEngine: ISearchEngine;
 27 |   private readonly databaseManager: IDatabaseManager;
 28 | 
 29 |   constructor(dependencies?: ServerDependencies) {
 30 |     try {
 31 |       // Initialize MCP Server
 32 |       this.server = new Server(
 33 |         { name: "perplexity-server", version: "0.2.0" },
 34 |         {
 35 |           capabilities: {
 36 |             tools: {
 37 |               listChanged: true,
 38 |             },
 39 |           },
 40 |         },
 41 |       );
 42 | 
 43 |       // Initialize modules with dependency injection
 44 |       this.databaseManager = dependencies?.databaseManager ?? new DatabaseManager();
 45 |       this.browserManager = dependencies?.browserManager ?? new BrowserManager();
 46 |       this.searchEngine = dependencies?.searchEngine ?? new SearchEngine(this.browserManager);
 47 | 
 48 |       // Initialize database
 49 |       this.databaseManager.initialize();
 50 | 
 51 |       // Setup tool handlers
 52 |       this.setupToolHandlers();
 53 | 
 54 |       // Setup graceful shutdown (only if not in MCP mode and not in test mode)
 55 |       // biome-ignore lint/complexity/useLiteralKeys: Environment variable access
 56 |       if (!process.env["MCP_MODE"] && !process.env["VITEST"]) {
 57 |         this.setupShutdownHandler();
 58 |       }
 59 | 
 60 |       logInfo("PerplexityServer initialized successfully");
 61 |     } catch (error) {
 62 |       logError("Error in PerplexityServer constructor:", {
 63 |         error: error instanceof Error ? error.message : String(error),
 64 |         stack: error instanceof Error ? error.stack : undefined,
 65 |       });
 66 |       throw error;
 67 |     }
 68 |   }
 69 | 
 70 |   private setupShutdownHandler(): void {
 71 |     process.on("SIGINT", async () => {
 72 |       logInfo("SIGINT received, shutting down gracefully...");
 73 |       try {
 74 |         await this.cleanup();
 75 |         await this.server.close();
 76 |         process.exit(0);
 77 |       } catch (error) {
 78 |         logError("Error during shutdown:", {
 79 |           error: error instanceof Error ? error.message : String(error),
 80 |         });
 81 |         process.exit(1);
 82 |       }
 83 |     });
 84 |   }
 85 | 
 86 |   private async cleanup(): Promise<void> {
 87 |     try {
 88 |       await this.browserManager.cleanup();
 89 |       this.databaseManager.close();
 90 |       logInfo("Server cleanup completed");
 91 |     } catch (error) {
 92 |       logError("Error during cleanup:", {
 93 |         error: error instanceof Error ? error.message : String(error),
 94 |       });
 95 |     }
 96 |   }
 97 | 
 98 |   // Tool handler implementations
 99 |   private async handleChatPerplexity(args: Record<string, unknown>): Promise<string> {
100 |     const typedArgs = args as { message: string; chat_id?: string };
101 | 
102 |     // Use modular search engine
103 |     const searchResult = await this.searchEngine.performSearch(typedArgs.message);
104 | 
105 |     // Use modular database manager
106 |     const getChatHistoryFn = (chatId: string) => this.databaseManager.getChatHistory(chatId);
107 |     const saveChatMessageFn = (
108 |       chatId: string,
109 |       message: { role: "user" | "assistant"; content: string },
110 |     ) => this.databaseManager.saveChatMessage(chatId, message.role, message.content);
111 | 
112 |     // Call the original tool implementation with injected dependencies
113 |     return await chatPerplexity(
114 |       typedArgs,
115 |       {} as never, // Context not needed with modular approach
116 |       () => Promise.resolve(searchResult),
117 |       getChatHistoryFn,
118 |       saveChatMessageFn,
119 |     );
120 |   }
121 | 
122 |   private async handleGetDocumentation(args: Record<string, unknown>): Promise<string> {
123 |     const typedArgs = args as { query: string; context?: string };
124 |     const searchResult = await this.searchEngine.performSearch(
125 |       `Documentation for ${typedArgs.query}: ${typedArgs.context || ""}`,
126 |     );
127 |     return searchResult;
128 |   }
129 | 
130 |   private async handleFindApis(args: Record<string, unknown>): Promise<string> {
131 |     const typedArgs = args as { requirement: string; context?: string };
132 |     const searchResult = await this.searchEngine.performSearch(
133 |       `Find APIs for ${typedArgs.requirement}: ${typedArgs.context || ""}`,
134 |     );
135 |     return searchResult;
136 |   }
137 | 
138 |   private async handleCheckDeprecatedCode(args: Record<string, unknown>): Promise<string> {
139 |     const typedArgs = args as { code: string; technology?: string };
140 |     const searchResult = await this.searchEngine.performSearch(
141 |       `Check if this ${typedArgs.technology || "code"} is deprecated: ${typedArgs.code}`,
142 |     );
143 |     return searchResult;
144 |   }
145 | 
146 |   private async handleSearch(args: Record<string, unknown>): Promise<string> {
147 |     const typedArgs = args as {
148 |       query: string;
149 |       detail_level?: "brief" | "normal" | "detailed";
150 |       stream?: boolean;
151 |     };
152 | 
153 |     return await this.searchEngine.performSearch(typedArgs.query);
154 |   }
155 | 
156 |   private async handleExtractUrlContent(args: Record<string, unknown>): Promise<string> {
157 |     const typedArgs = args as { url: string; depth?: number };
158 | 
159 |     // Ensure browser is initialized
160 |     if (!this.browserManager.isReady()) {
161 |       await this.browserManager.initialize();
162 |     }
163 | 
164 |     // Create PuppeteerContext from BrowserManager
165 |     const ctx = this.createPuppeteerContext();
166 | 
167 |     return await extractUrlContent(typedArgs, ctx);
168 |   }
169 | 
170 |   private createPuppeteerContext() {
171 |     const browserManager = this.browserManager as any; // Access the getPuppeteerContext method
172 |     return browserManager.getPuppeteerContext();
173 |   }
174 | 
175 |   private setupToolHandlers(): void {
176 |     const toolHandlers = createToolHandlersRegistry({
177 |       chat_perplexity: this.handleChatPerplexity.bind(this),
178 |       get_documentation: this.handleGetDocumentation.bind(this),
179 |       find_apis: this.handleFindApis.bind(this),
180 |       check_deprecated_code: this.handleCheckDeprecatedCode.bind(this),
181 |       search: this.handleSearch.bind(this),
182 |       extract_url_content: this.handleExtractUrlContent.bind(this),
183 |     });
184 | 
185 |     setupToolHandlers(this.server, toolHandlers);
186 |   }
187 | 
188 |   async run(): Promise<void> {
189 |     try {
190 |       logInfo("Creating StdioServerTransport...");
191 |       const transport = new StdioServerTransport();
192 | 
193 |       logInfo("Starting PerplexityServer...");
194 |       logInfo(`Tools registered: ${Object.keys(this.getToolHandlersRegistry()).join(", ")}`);
195 | 
196 |       logInfo("Attempting to connect server to transport...");
197 |       await this.server.connect(transport);
198 |       logInfo("PerplexityServer connected and ready");
199 |       logInfo("Server is listening for requests...");
200 | 
201 |       // Keep the process alive
202 |       process.stdin.resume();
203 |     } catch (error) {
204 |       logError("Failed to start server:", {
205 |         error: error instanceof Error ? error.message : String(error),
206 |         stack: error instanceof Error ? error.stack : undefined,
207 |       });
208 |       process.exit(1);
209 |     }
210 |   }
211 | 
212 |   private getToolHandlersRegistry() {
213 |     return {
214 |       chat_perplexity: this.handleChatPerplexity.bind(this),
215 |       get_documentation: this.handleGetDocumentation.bind(this),
216 |       find_apis: this.handleFindApis.bind(this),
217 |       check_deprecated_code: this.handleCheckDeprecatedCode.bind(this),
218 |       search: this.handleSearch.bind(this),
219 |       extract_url_content: this.handleExtractUrlContent.bind(this),
220 |     };
221 |   }
222 | 
223 |   // Getters for testing
224 |   public getBrowserManager(): IBrowserManager {
225 |     return this.browserManager;
226 |   }
227 | 
228 |   public getSearchEngine(): ISearchEngine {
229 |     return this.searchEngine;
230 |   }
231 | 
232 |   public getDatabaseManager(): IDatabaseManager {
233 |     return this.databaseManager;
234 |   }
235 | }
236 | 


--------------------------------------------------------------------------------
/src/__tests__/unit/database.test.ts:
--------------------------------------------------------------------------------
  1 | import { existsSync, mkdirSync } from "node:fs";
  2 | import { join } from "node:path";
  3 | import { beforeEach, describe, expect, it, vi } from "vitest";
  4 | import type { ChatMessage } from "../../types/index.js";
  5 | import { DatabaseManager } from "../../server/modules/DatabaseManager.js";
  6 | 
  7 | // Mock Node.js fs module
  8 | vi.mock("node:fs", () => ({
  9 |   existsSync: vi.fn(),
 10 |   mkdirSync: vi.fn(),
 11 | }));
 12 | 
 13 | // Mock Node.js path module
 14 | const { mockDirname } = vi.hoisted(() => ({
 15 |   mockDirname: vi.fn(),
 16 | }));
 17 | 
 18 | vi.mock("node:path", () => ({
 19 |   dirname: mockDirname,
 20 |   join: vi.fn(),
 21 | }));
 22 | 
 23 | // Mock Node.js url module
 24 | vi.mock("node:url", () => ({
 25 |   fileURLToPath: vi.fn().mockReturnValue("/mock/path/to/module.js"),
 26 | }));
 27 | 
 28 | // Mock bun:sqlite
 29 | vi.mock("bun:sqlite", () => {
 30 |   const mockStmt = {
 31 |     all: vi.fn(),
 32 |     run: vi.fn(),
 33 |   };
 34 | 
 35 |   return {
 36 |     Database: vi.fn().mockImplementation(() => ({
 37 |       close: vi.fn(),
 38 |       exec: vi.fn(),
 39 |       prepare: vi.fn().mockReturnValue(mockStmt),
 40 |       query: vi.fn().mockReturnValue(mockStmt),
 41 |     })),
 42 |   };
 43 | });
 44 | 
 45 | // Mock database utilities
 46 | vi.mock("../../utils/db.js", () => ({
 47 |   initializeDatabase: vi.fn(),
 48 |   getChatHistory: vi.fn(),
 49 |   saveChatMessage: vi.fn(),
 50 | }));
 51 | 
 52 | // Mock logging
 53 | vi.mock("../../utils/logging.js", () => ({
 54 |   logInfo: vi.fn(),
 55 |   logWarn: vi.fn(),
 56 |   logError: vi.fn(),
 57 | }));
 58 | 
 59 | import * as dbUtils from "../../utils/db.js";
 60 | import * as logging from "../../utils/logging.js";
 61 | 
 62 | const mockExistsSync = vi.mocked(existsSync);
 63 | const mockMkdirSync = vi.mocked(mkdirSync);
 64 | const mockJoin = vi.mocked(join);
 65 | const mockInitializeDatabase = vi.mocked(dbUtils.initializeDatabase);
 66 | const mockGetChatHistory = vi.mocked(dbUtils.getChatHistory);
 67 | const mockSaveChatMessage = vi.mocked(dbUtils.saveChatMessage);
 68 | const mockLogInfo = vi.mocked(logging.logInfo);
 69 | const mockLogError = vi.mocked(logging.logError);
 70 | 
 71 | describe("DatabaseManager", () => {
 72 |   let databaseManager: DatabaseManager;
 73 | 
 74 |   beforeEach(() => {
 75 |     vi.clearAllMocks();
 76 |     databaseManager = new DatabaseManager();
 77 |   });
 78 | 
 79 |   describe("constructor", () => {
 80 |     it("should create instance with default path", () => {
 81 |       const manager = new DatabaseManager();
 82 |       expect(manager).toBeInstanceOf(DatabaseManager);
 83 |     });
 84 | 
 85 |     it("should create instance with custom path", () => {
 86 |       const customPath = "/custom/path/to/db.sqlite";
 87 |       const manager = new DatabaseManager(customPath);
 88 |       expect(manager).toBeInstanceOf(DatabaseManager);
 89 |     });
 90 |   });
 91 | 
 92 |   describe("initialize", () => {
 93 |     beforeEach(() => {
 94 |       mockJoin.mockReturnValue("/mock/path/to/chat_history.db");
 95 |       mockExistsSync.mockReturnValue(true);
 96 |       mockInitializeDatabase.mockImplementation(() => {});
 97 |     });
 98 | 
 99 |     it("should initialize successfully with existing directory", () => {
100 |       mockExistsSync.mockReturnValue(true);
101 | 
102 |       databaseManager.initialize();
103 | 
104 |       expect(mockJoin).toHaveBeenCalled();
105 |       expect(mockInitializeDatabase).toHaveBeenCalled();
106 |       expect(mockLogInfo).toHaveBeenCalledWith("DatabaseManager initialized successfully");
107 |       expect(databaseManager.isInitialized()).toBe(true);
108 |     });
109 | 
110 |     it("should create directory if it doesn't exist", () => {
111 |       mockExistsSync.mockReturnValue(false);
112 |       const mockDirPath = "/mock/path/to";
113 |       mockDirname.mockReturnValue(mockDirPath);
114 | 
115 |       databaseManager.initialize();
116 | 
117 |       expect(mockMkdirSync).toHaveBeenCalledWith(mockDirPath, { recursive: true });
118 |       expect(mockLogInfo).toHaveBeenCalledWith(`Created database directory: ${mockDirPath}`);
119 |     });
120 | 
121 |     it("should use custom database path when provided", () => {
122 |       const customPath = "/custom/db/path.sqlite";
123 |       const customManager = new DatabaseManager(customPath);
124 |       expect(customManager).toBeInstanceOf(DatabaseManager);
125 |     });
126 | 
127 |     it("should handle initialization errors", () => {
128 |       const error = new Error("Database initialization failed");
129 |       mockInitializeDatabase.mockImplementationOnce(() => {
130 |         throw error;
131 |       });
132 | 
133 |       expect(() => databaseManager.initialize()).toThrow("Database initialization failed");
134 |       expect(mockLogError).toHaveBeenCalledWith(
135 |         "DatabaseManager initialization failed:",
136 |         expect.any(Object),
137 |       );
138 |     });
139 |   });
140 | 
141 |   describe("getChatHistory", () => {
142 |     beforeEach(() => {
143 |       // Initialize the database manager
144 |       mockJoin.mockReturnValue("/mock/path/to/chat_history.db");
145 |       mockExistsSync.mockReturnValue(true);
146 |       mockInitializeDatabase.mockImplementation(() => {});
147 |       databaseManager.initialize();
148 |     });
149 | 
150 |     it("should get chat history successfully", () => {
151 |       const chatId = "test-chat-123";
152 |       const mockMessages: ChatMessage[] = [
153 |         { role: "user", content: "Hello" },
154 |         { role: "assistant", content: "Hi there!" },
155 |       ];
156 |       mockGetChatHistory.mockReturnValue(mockMessages);
157 | 
158 |       const result = databaseManager.getChatHistory(chatId);
159 | 
160 |       expect(result).toEqual(mockMessages);
161 |       expect(mockGetChatHistory).toHaveBeenCalledWith(expect.anything(), chatId);
162 |     });
163 | 
164 |     it("should throw error when not initialized", () => {
165 |       const uninitializedManager = new DatabaseManager();
166 |       expect(() => uninitializedManager.getChatHistory("test-chat")).toThrow(
167 |         "Database not initialized",
168 |       );
169 |     });
170 | 
171 |     it("should throw error when chat ID is not provided", () => {
172 |       expect(() => databaseManager.getChatHistory("")).toThrow("Chat ID is required");
173 |     });
174 |   });
175 | 
176 |   describe("saveChatMessage", () => {
177 |     beforeEach(() => {
178 |       // Initialize the database manager
179 |       mockJoin.mockReturnValue("/mock/path/to/chat_history.db");
180 |       mockExistsSync.mockReturnValue(true);
181 |       mockInitializeDatabase.mockImplementation(() => {});
182 |       databaseManager.initialize();
183 |     });
184 | 
185 |     it("should save user message successfully", () => {
186 |       const chatId = "test-chat-123";
187 |       const role = "user";
188 |       const content = "Hello, how are you?";
189 | 
190 |       databaseManager.saveChatMessage(chatId, role, content);
191 | 
192 |       expect(mockSaveChatMessage).toHaveBeenCalledWith(expect.anything(), chatId, {
193 |         role,
194 |         content,
195 |       });
196 |       expect(mockLogInfo).toHaveBeenCalledWith(`Saved ${role} message for chat ${chatId}`);
197 |     });
198 | 
199 |     it("should save assistant message successfully", () => {
200 |       const chatId = "test-chat-456";
201 |       const role = "assistant";
202 |       const content = "I'm doing well, thank you!";
203 | 
204 |       databaseManager.saveChatMessage(chatId, role, content);
205 | 
206 |       expect(mockSaveChatMessage).toHaveBeenCalledWith(expect.anything(), chatId, {
207 |         role,
208 |         content,
209 |       });
210 |       expect(mockLogInfo).toHaveBeenCalledWith(`Saved ${role} message for chat ${chatId}`);
211 |     });
212 | 
213 |     it("should throw error when not initialized", () => {
214 |       const uninitializedManager = new DatabaseManager();
215 |       expect(() => uninitializedManager.saveChatMessage("chat", "user", "message")).toThrow(
216 |         "Database not initialized",
217 |       );
218 |     });
219 |   });
220 | 
221 |   describe("close", () => {
222 |     it("should close database connection successfully", () => {
223 |       // Initialize first
224 |       mockJoin.mockReturnValue("/mock/path/to/chat_history.db");
225 |       mockExistsSync.mockReturnValue(true);
226 |       mockInitializeDatabase.mockImplementation(() => {});
227 |       databaseManager.initialize();
228 | 
229 |       databaseManager.close();
230 | 
231 |       expect(mockLogInfo).toHaveBeenCalledWith("Database connection closed successfully");
232 |       expect(databaseManager.isInitialized()).toBe(false);
233 |     });
234 | 
235 |     it("should handle close when not initialized", () => {
236 |       databaseManager.close();
237 |       // Should not throw error
238 |     });
239 |   });
240 | 
241 |   describe("isInitialized", () => {
242 |     it("should return false when not initialized", () => {
243 |       expect(databaseManager.isInitialized()).toBe(false);
244 |     });
245 | 
246 |     it("should return true when properly initialized", () => {
247 |       mockJoin.mockReturnValue("/mock/path/to/chat_history.db");
248 |       mockExistsSync.mockReturnValue(true);
249 |       mockInitializeDatabase.mockImplementation(() => {});
250 |       databaseManager.initialize();
251 |       expect(databaseManager.isInitialized()).toBe(true);
252 |     });
253 |   });
254 | });
255 | 


--------------------------------------------------------------------------------
/src/__tests__/unit/puppeteer-logic.test.ts:
--------------------------------------------------------------------------------
  1 | import { describe, expect, it } from "vitest";
  2 | 
  3 | describe("Puppeteer Logic Utilities", () => {
  4 |   describe("Error Analysis Functions", () => {
  5 |     it("should detect timeout errors", async () => {
  6 |       const { analyzeError } = await import("../../utils/puppeteer-logic.js");
  7 | 
  8 |       const timeoutError = new Error("Navigation timeout of 30000 ms exceeded");
  9 |       const analysis = analyzeError(timeoutError);
 10 | 
 11 |       expect(analysis.isTimeout).toBe(true);
 12 |     });
 13 | 
 14 |     it("should detect navigation errors", async () => {
 15 |       const { analyzeError } = await import("../../utils/puppeteer-logic.js");
 16 | 
 17 |       const navError = new Error("net::ERR_NAME_NOT_RESOLVED");
 18 |       const analysis = analyzeError(navError);
 19 | 
 20 |       expect(analysis.isConnection).toBe(true);
 21 |     });
 22 | 
 23 |     it("should detect connection errors", async () => {
 24 |       const { analyzeError } = await import("../../utils/puppeteer-logic.js");
 25 | 
 26 |       const connError = new Error("net::ERR_CONNECTION_REFUSED");
 27 |       const analysis = analyzeError(connError);
 28 | 
 29 |       expect(analysis.isConnection).toBe(true);
 30 |     });
 31 | 
 32 |     it("should detect detached frame errors", async () => {
 33 |       const { analyzeError } = await import("../../utils/puppeteer-logic.js");
 34 | 
 35 |       const frameError = new Error("Execution context was destroyed, detached frame");
 36 |       const analysis = analyzeError(frameError);
 37 | 
 38 |       expect(analysis.isDetachedFrame).toBe(true);
 39 |     });
 40 | 
 41 |     it("should detect CAPTCHA errors", async () => {
 42 |       const { analyzeError } = await import("../../utils/puppeteer-logic.js");
 43 | 
 44 |       const captchaError = new Error("CAPTCHA challenge detected");
 45 |       const analysis = analyzeError(captchaError);
 46 | 
 47 |       expect(analysis.isCaptcha).toBe(true);
 48 |     });
 49 | 
 50 |     it("should handle string errors", async () => {
 51 |       const { analyzeError } = await import("../../utils/puppeteer-logic.js");
 52 | 
 53 |       const analysis = analyzeError("Random error message");
 54 | 
 55 |       // Should have all boolean properties
 56 |       expect(typeof analysis.isTimeout).toBe("boolean");
 57 |       expect(typeof analysis.isNavigation).toBe("boolean");
 58 |       expect(typeof analysis.isConnection).toBe("boolean");
 59 |       expect(typeof analysis.isDetachedFrame).toBe("boolean");
 60 |       expect(typeof analysis.isCaptcha).toBe("boolean");
 61 |     });
 62 |   });
 63 | 
 64 |   describe("Recovery Level Determination", () => {
 65 |     it("should determine level 1 recovery for minor errors", async () => {
 66 |       const { determineRecoveryLevel } = await import("../../utils/puppeteer-logic.js");
 67 | 
 68 |       const error = new Error("Minor error");
 69 |       const context = {
 70 |         hasValidPage: true,
 71 |         hasBrowser: true,
 72 |         isBrowserConnected: true,
 73 |         operationCount: 0,
 74 |         consecutiveTimeouts: 0,
 75 |         consecutiveNavigationErrors: 0,
 76 |       };
 77 |       const level = determineRecoveryLevel(error, context);
 78 | 
 79 |       expect(level).toBe(1);
 80 |     });
 81 | 
 82 |     it("should determine level 2 recovery for page issues", async () => {
 83 |       const { determineRecoveryLevel } = await import("../../utils/puppeteer-logic.js");
 84 | 
 85 |       const error = new Error("Page error");
 86 |       const context = {
 87 |         hasValidPage: false,
 88 |         hasBrowser: true,
 89 |         isBrowserConnected: true,
 90 |         operationCount: 0,
 91 |         consecutiveTimeouts: 0,
 92 |         consecutiveNavigationErrors: 0,
 93 |       };
 94 |       const level = determineRecoveryLevel(error, context);
 95 | 
 96 |       expect(level).toBe(2);
 97 |     });
 98 | 
 99 |     it("should determine level 3 recovery for critical errors", async () => {
100 |       const { determineRecoveryLevel } = await import("../../utils/puppeteer-logic.js");
101 | 
102 |       const error = new Error("Frame detached error");
103 |       const level = determineRecoveryLevel(error);
104 | 
105 |       expect(level).toBe(3);
106 |     });
107 | 
108 |     it("should determine recovery level based on context", async () => {
109 |       const { determineRecoveryLevel } = await import("../../utils/puppeteer-logic.js");
110 | 
111 |       const context = {
112 |         hasValidPage: false,
113 |         hasBrowser: true,
114 |         isBrowserConnected: true,
115 |         operationCount: 5,
116 |         consecutiveTimeouts: 0,
117 |         consecutiveNavigationErrors: 0,
118 |       };
119 | 
120 |       const level = determineRecoveryLevel(undefined, context);
121 | 
122 |       // Should determine based on context when no error provided
123 |       expect(typeof level).toBe("number");
124 |       expect(level).toBeGreaterThanOrEqual(1);
125 |       expect(level).toBeLessThanOrEqual(3);
126 |     });
127 |   });
128 | 
129 |   describe("Retry Delay Calculation", () => {
130 |     it("should calculate basic retry delay", async () => {
131 |       const { calculateRetryDelay } = await import("../../utils/puppeteer-logic.js");
132 | 
133 |       const errorAnalysis = {
134 |         isTimeout: false,
135 |         isNavigation: false,
136 |         isConnection: false,
137 |         isDetachedFrame: false,
138 |         isCaptcha: false,
139 |         consecutiveTimeouts: 0,
140 |         consecutiveNavigationErrors: 0,
141 |       };
142 | 
143 |       const delay = calculateRetryDelay(1, errorAnalysis);
144 | 
145 |       expect(typeof delay).toBe("number");
146 |       expect(delay).toBeGreaterThan(0);
147 |     });
148 | 
149 |     it("should calculate increased delay for consecutive timeouts", async () => {
150 |       const { calculateRetryDelay } = await import("../../utils/puppeteer-logic.js");
151 | 
152 |       const errorAnalysis1 = {
153 |         isTimeout: false,
154 |         isNavigation: false,
155 |         isConnection: false,
156 |         isDetachedFrame: false,
157 |         isCaptcha: false,
158 |         consecutiveTimeouts: 0,
159 |         consecutiveNavigationErrors: 0,
160 |       };
161 | 
162 |       const errorAnalysis2 = {
163 |         isTimeout: true,
164 |         isNavigation: false,
165 |         isConnection: false,
166 |         isDetachedFrame: false,
167 |         isCaptcha: false,
168 |         consecutiveTimeouts: 1,
169 |         consecutiveNavigationErrors: 0,
170 |       };
171 | 
172 |       const delay1 = calculateRetryDelay(1, errorAnalysis1);
173 |       const delay2 = calculateRetryDelay(1, errorAnalysis2);
174 | 
175 |       expect(delay2).toBeGreaterThan(delay1);
176 |     });
177 | 
178 |     it("should increase delay with attempt number", async () => {
179 |       const { calculateRetryDelay } = await import("../../utils/puppeteer-logic.js");
180 | 
181 |       const errorAnalysis = {
182 |         isTimeout: false,
183 |         isNavigation: false,
184 |         isConnection: false,
185 |         isDetachedFrame: false,
186 |         isCaptcha: false,
187 |         consecutiveTimeouts: 0,
188 |         consecutiveNavigationErrors: 0,
189 |       };
190 | 
191 |       const delay1 = calculateRetryDelay(1, errorAnalysis);
192 |       const delay3 = calculateRetryDelay(3, errorAnalysis);
193 | 
194 |       expect(delay3).toBeGreaterThan(delay1);
195 |     });
196 |   });
197 | 
198 |   describe("Browser Argument Generation", () => {
199 |     it("should generate browser arguments with user agent", async () => {
200 |       const { generateBrowserArgs } = await import("../../utils/puppeteer-logic.js");
201 | 
202 |       const userAgent = "test-user-agent";
203 |       const args = generateBrowserArgs(userAgent);
204 | 
205 |       expect(Array.isArray(args)).toBe(true);
206 |       expect(args.length).toBeGreaterThan(0);
207 |       expect(args).toContain(`--user-agent=${userAgent}`);
208 |     });
209 | 
210 |     it("should include essential browser arguments", async () => {
211 |       const { generateBrowserArgs } = await import("../../utils/puppeteer-logic.js");
212 | 
213 |       const args = generateBrowserArgs("test-agent");
214 | 
215 |       expect(args).toContain("--no-sandbox");
216 |       expect(args).toContain("--disable-setuid-sandbox");
217 |       expect(args).toContain("--disable-dev-shm-usage");
218 |     });
219 |   });
220 | 
221 |   describe("URL Validation", () => {
222 |     it("should validate navigation URLs", async () => {
223 |       const { validateNavigationUrl } = await import("../../utils/puppeteer-logic.js");
224 | 
225 |       expect(validateNavigationUrl("https://www.perplexity.ai/")).toBe(true);
226 |       expect(validateNavigationUrl("https://perplexity.ai/")).toBe(true);
227 |       expect(validateNavigationUrl("http://example.com")).toBe(true);
228 |     });
229 | 
230 |     it("should reject invalid URLs", async () => {
231 |       const { validateNavigationUrl } = await import("../../utils/puppeteer-logic.js");
232 | 
233 |       expect(validateNavigationUrl("")).toBe(false);
234 |       expect(validateNavigationUrl("invalid-url")).toBe(false);
235 |       expect(validateNavigationUrl("javascript:alert(1)")).toBe(false);
236 |     });
237 | 
238 |     it("should validate navigation failures", async () => {
239 |       const { isNavigationFailure } = await import("../../utils/puppeteer-logic.js");
240 | 
241 |       expect(isNavigationFailure("https://www.perplexity.ai/", "https://www.perplexity.ai/")).toBe(
242 |         false,
243 |       );
244 |       expect(isNavigationFailure("https://www.perplexity.ai/", "https://wrong-domain.com/")).toBe(
245 |         true,
246 |       );
247 |     });
248 |   });
249 | 
250 |   describe("Selector Functions", () => {
251 |     it("should provide search input selectors", async () => {
252 |       const { getSearchInputSelectors } = await import("../../utils/puppeteer-logic.js");
253 | 
254 |       const selectors = getSearchInputSelectors();
255 | 
256 |       expect(Array.isArray(selectors)).toBe(true);
257 |       expect(selectors.length).toBeGreaterThan(0);
258 |       expect(typeof selectors[0]).toBe("string");
259 |     });
260 | 
261 |     it("should provide CAPTCHA selectors", async () => {
262 |       const { getCaptchaSelectors } = await import("../../utils/puppeteer-logic.js");
263 | 
264 |       const selectors = getCaptchaSelectors();
265 | 
266 |       expect(Array.isArray(selectors)).toBe(true);
267 |       expect(typeof selectors[0]).toBe("string");
268 |     });
269 |   });
270 | });
271 | 


--------------------------------------------------------------------------------
/src/__tests__/unit/db.test.ts:
--------------------------------------------------------------------------------
  1 | import { describe, expect, it, vi } from "vitest";
  2 | import type { Database } from "bun:sqlite";
  3 | import type { ChatMessage } from "../../types/index.js";
  4 | import * as dbModule from "../../utils/db.js";
  5 | 
  6 | // Mock logging
  7 | vi.mock("../../utils/logging.js", () => ({
  8 |   logInfo: vi.fn(),
  9 |   logWarn: vi.fn(),
 10 |   logError: vi.fn(),
 11 | }));
 12 | 
 13 | describe("Database Utilities", () => {
 14 |   // Create mock database implementation
 15 |   const createMockDatabase = () => {
 16 |     const tables: Record<string, any[]> = {};
 17 | 
 18 |     return {
 19 |       exec: vi.fn((sql: string) => {
 20 |         // Simulate table creation
 21 |         if (sql.includes("CREATE TABLE IF NOT EXISTS chats")) {
 22 |           tables["chats"] = tables["chats"] || [];
 23 |         }
 24 |         if (sql.includes("CREATE TABLE IF NOT EXISTS messages")) {
 25 |           tables["messages"] = tables["messages"] || [];
 26 |         }
 27 |       }),
 28 |       query: vi.fn((sql: string) => ({
 29 |         all: vi.fn((chatId: string) => {
 30 |           if (sql.includes("SELECT role, content FROM messages")) {
 31 |             // Return mock chat history
 32 |             return (
 33 |               tables["messages"]
 34 |                 ?.filter((msg) => msg.chat_id === chatId)
 35 |                 .map((msg) => ({ role: msg.role, content: msg.content })) || []
 36 |             );
 37 |           }
 38 |           return [];
 39 |         }),
 40 |       })),
 41 |       prepare: vi.fn((sql: string) => ({
 42 |         run: vi.fn((...params: any[]) => {
 43 |           if (sql.includes("INSERT OR IGNORE INTO chats")) {
 44 |             const chatId = params[0];
 45 |             // Simulate inserting chat if not exists
 46 |             if (!tables["chats"]?.some((chat) => chat.id === chatId)) {
 47 |               tables["chats"] = tables["chats"] || [];
 48 |               tables["chats"].push({ id: chatId, created_at: new Date().toISOString() });
 49 |             }
 50 |           } else if (sql.includes("INSERT INTO messages")) {
 51 |             const [chatId, role, content] = params;
 52 |             // Simulate inserting message
 53 |             tables["messages"] = tables["messages"] || [];
 54 |             tables["messages"].push({
 55 |               id: tables["messages"].length + 1,
 56 |               chat_id: chatId,
 57 |               role: role as string,
 58 |               content: content as string,
 59 |               created_at: new Date().toISOString(),
 60 |             });
 61 |           }
 62 |           return { changes: 1 };
 63 |         }),
 64 |       })),
 65 |     };
 66 |   };
 67 | 
 68 |   describe("Database Initialization", () => {
 69 |     it("should create required tables when initializing database", () => {
 70 |       const mockDb = createMockDatabase();
 71 | 
 72 |       dbModule.initializeDatabase(mockDb as unknown as Database);
 73 | 
 74 |       expect(mockDb.exec).toHaveBeenCalledTimes(2);
 75 |       expect(mockDb.exec).toHaveBeenCalledWith(
 76 |         expect.stringContaining("CREATE TABLE IF NOT EXISTS chats"),
 77 |       );
 78 |       expect(mockDb.exec).toHaveBeenCalledWith(
 79 |         expect.stringContaining("CREATE TABLE IF NOT EXISTS messages"),
 80 |       );
 81 |     });
 82 | 
 83 |     it("should create tables with correct schema", () => {
 84 |       const mockDb = createMockDatabase();
 85 | 
 86 |       dbModule.initializeDatabase(mockDb as unknown as Database);
 87 | 
 88 |       // Verify chats table structure
 89 |       expect(mockDb.exec).toHaveBeenCalledWith(expect.stringContaining("id TEXT PRIMARY KEY"));
 90 |       expect(mockDb.exec).toHaveBeenCalledWith(
 91 |         expect.stringContaining("created_at DATETIME DEFAULT CURRENT_TIMESTAMP"),
 92 |       );
 93 | 
 94 |       // Verify messages table structure
 95 |       expect(mockDb.exec).toHaveBeenCalledWith(expect.stringContaining("chat_id TEXT NOT NULL"));
 96 |       expect(mockDb.exec).toHaveBeenCalledWith(expect.stringContaining("role TEXT NOT NULL"));
 97 |       expect(mockDb.exec).toHaveBeenCalledWith(expect.stringContaining("content TEXT NOT NULL"));
 98 |       expect(mockDb.exec).toHaveBeenCalledWith(
 99 |         expect.stringContaining("FOREIGN KEY (chat_id) REFERENCES chats(id)"),
100 |       );
101 |     });
102 |   });
103 | 
104 |   describe("Chat History Operations", () => {
105 |     it("should retrieve chat history for a given chat ID", () => {
106 |       const mockDb = createMockDatabase();
107 | 
108 |       // Initialize database and add some test data
109 |       dbModule.initializeDatabase(mockDb as unknown as Database);
110 | 
111 |       const chatId = "test-chat-123";
112 |       const testMessages: ChatMessage[] = [
113 |         { role: "user", content: "Hello!" },
114 |         { role: "assistant", content: "Hi there!" },
115 |         { role: "user", content: "How are you?" },
116 |       ];
117 | 
118 |       // Save test messages
119 |       testMessages.forEach((msg) => {
120 |         dbModule.saveChatMessage(mockDb as unknown as Database, chatId, msg);
121 |       });
122 | 
123 |       // Retrieve chat history
124 |       const history = dbModule.getChatHistory(mockDb as unknown as Database, chatId);
125 | 
126 |       expect(history).toHaveLength(3);
127 |       expect(history[0]).toEqual(testMessages[0]);
128 |       expect(history[1]).toEqual(testMessages[1]);
129 |       expect(history[2]).toEqual(testMessages[2]);
130 |       expect(mockDb.query).toHaveBeenCalledWith(
131 |         expect.stringContaining("SELECT role, content FROM messages"),
132 |       );
133 |     });
134 | 
135 |     it("should return empty array for non-existent chat ID", () => {
136 |       const mockDb = createMockDatabase();
137 | 
138 |       const history = dbModule.getChatHistory(mockDb as unknown as Database, "non-existent-chat");
139 | 
140 |       expect(history).toEqual([]);
141 |       expect(Array.isArray(history)).toBe(true);
142 |     });
143 | 
144 |     it("should return messages ordered by creation time", () => {
145 |       const mockDb = createMockDatabase();
146 | 
147 |       dbModule.initializeDatabase(mockDb as unknown as Database);
148 | 
149 |       const chatId = "ordered-test-chat";
150 |       const messagesInOrder: ChatMessage[] = [
151 |         { role: "user", content: "First message" },
152 |         { role: "assistant", content: "Second message" },
153 |         { role: "user", content: "Third message" },
154 |       ];
155 | 
156 |       // Save messages in order
157 |       messagesInOrder.forEach((msg) => {
158 |         dbModule.saveChatMessage(mockDb as unknown as Database, chatId, msg);
159 |       });
160 | 
161 |       const history = dbModule.getChatHistory(mockDb as unknown as Database, chatId);
162 | 
163 |       expect(history).toHaveLength(3);
164 |       expect(history[0]?.content).toBe("First message");
165 |       expect(history[1]?.content).toBe("Second message");
166 |       expect(history[2]?.content).toBe("Third message");
167 |     });
168 |   });
169 | 
170 |   describe("Chat Message Saving", () => {
171 |     it("should save user message to database", () => {
172 |       const mockDb = createMockDatabase();
173 | 
174 |       dbModule.initializeDatabase(mockDb as unknown as Database);
175 | 
176 |       const chatId = "save-user-test";
177 |       const message: ChatMessage = { role: "user", content: "Test user message" };
178 | 
179 |       dbModule.saveChatMessage(mockDb as unknown as Database, chatId, message);
180 | 
181 |       expect(mockDb.prepare).toHaveBeenCalledWith(
182 |         expect.stringContaining("INSERT OR IGNORE INTO chats"),
183 |       );
184 |       expect(mockDb.prepare).toHaveBeenCalledWith(expect.stringContaining("INSERT INTO messages"));
185 |     });
186 | 
187 |     it("should save assistant message to database", () => {
188 |       const mockDb = createMockDatabase();
189 | 
190 |       dbModule.initializeDatabase(mockDb as unknown as Database);
191 | 
192 |       const chatId = "save-assistant-test";
193 |       const message: ChatMessage = { role: "assistant", content: "Test assistant response" };
194 | 
195 |       dbModule.saveChatMessage(mockDb as unknown as Database, chatId, message);
196 | 
197 |       expect(mockDb.prepare).toHaveBeenCalledWith(
198 |         expect.stringContaining("INSERT OR IGNORE INTO chats"),
199 |       );
200 |       expect(mockDb.prepare).toHaveBeenCalledWith(expect.stringContaining("INSERT INTO messages"));
201 |     });
202 | 
203 |     it("should create chat record if it doesn't exist when saving message", () => {
204 |       const mockDb = createMockDatabase();
205 | 
206 |       dbModule.initializeDatabase(mockDb as unknown as Database);
207 | 
208 |       const chatId = "new-chat-test";
209 |       const message: ChatMessage = { role: "user", content: "First message in new chat" };
210 | 
211 |       // Before saving, chat should not exist
212 |       let history = dbModule.getChatHistory(mockDb as unknown as Database, chatId);
213 |       expect(history).toEqual([]);
214 | 
215 |       // Save message (should create chat)
216 |       dbModule.saveChatMessage(mockDb as unknown as Database, chatId, message);
217 | 
218 |       // After saving, chat should exist with the message
219 |       history = dbModule.getChatHistory(mockDb as unknown as Database, chatId);
220 |       expect(history).toHaveLength(1);
221 |       expect(history[0]).toEqual(message);
222 |     });
223 | 
224 |     it("should prevent duplicate chat creation", () => {
225 |       const mockDb = createMockDatabase();
226 | 
227 |       dbModule.initializeDatabase(mockDb as unknown as Database);
228 | 
229 |       const chatId = "duplicate-test";
230 |       const message1: ChatMessage = { role: "user", content: "First message" };
231 |       const message2: ChatMessage = { role: "assistant", content: "Response" };
232 | 
233 |       // Save two messages to the same chat
234 |       dbModule.saveChatMessage(mockDb as unknown as Database, chatId, message1);
235 |       dbModule.saveChatMessage(mockDb as unknown as Database, chatId, message2);
236 | 
237 |       // Should have called INSERT OR IGNORE twice but should only create chat once
238 |       const prepareCalls = (mockDb.prepare as any).mock.calls;
239 |       const insertOrIgnoreCalls = prepareCalls.filter((call: any) =>
240 |         call[0]?.includes("INSERT OR IGNORE INTO chats"),
241 |       );
242 | 
243 |       expect(insertOrIgnoreCalls).toHaveLength(2);
244 |     });
245 |   });
246 | 
247 |   describe("Edge Cases", () => {
248 |     it("should handle special characters in message content", () => {
249 |       const mockDb = createMockDatabase();
250 | 
251 |       dbModule.initializeDatabase(mockDb as unknown as Database);
252 | 
253 |       const chatId = "special-chars-test";
254 |       const messageWithSpecialChars: ChatMessage = {
255 |         role: "user",
256 |         content: "Message with 'quotes', \"double quotes\", and \n newlines",
257 |       };
258 | 
259 |       dbModule.saveChatMessage(mockDb as unknown as Database, chatId, messageWithSpecialChars);
260 |       const history = dbModule.getChatHistory(mockDb as unknown as Database, chatId);
261 | 
262 |       expect(history).toHaveLength(1);
263 |       expect(history[0]).toEqual(messageWithSpecialChars);
264 |     });
265 | 
266 |     it("should handle empty message content", () => {
267 |       const mockDb = createMockDatabase();
268 | 
269 |       dbModule.initializeDatabase(mockDb as unknown as Database);
270 | 
271 |       const chatId = "empty-content-test";
272 |       const emptyMessage: ChatMessage = { role: "user", content: "" };
273 | 
274 |       dbModule.saveChatMessage(mockDb as unknown as Database, chatId, emptyMessage);
275 |       const history = dbModule.getChatHistory(mockDb as unknown as Database, chatId);
276 | 
277 |       expect(history).toHaveLength(1);
278 |       expect(history[0]).toEqual(emptyMessage);
279 |     });
280 |   });
281 | });
282 | 


--------------------------------------------------------------------------------
/src/__tests__/unit/logging.test.ts:
--------------------------------------------------------------------------------
  1 | import { describe, expect, it, vi } from "vitest";
  2 | import type { LogLevel } from "../../utils/logging.js";
  3 | 
  4 | describe("Logging Utilities", () => {
  5 |   // Mock console.error to capture log output
  6 |   const mockConsoleError = vi.spyOn(console, "error").mockImplementation(() => {});
  7 | 
  8 |   beforeEach(() => {
  9 |     vi.clearAllMocks();
 10 |   });
 11 | 
 12 |   afterAll(() => {
 13 |     mockConsoleError.mockRestore();
 14 |   });
 15 | 
 16 |   describe("Core log function", () => {
 17 |     it("should export logging functions", async () => {
 18 |       const logging = await import("../../utils/logging.js");
 19 | 
 20 |       expect(logging.log).toBeDefined();
 21 |       expect(typeof logging.log).toBe("function");
 22 |       expect(logging.logInfo).toBeDefined();
 23 |       expect(typeof logging.logInfo).toBe("function");
 24 |       expect(logging.logWarn).toBeDefined();
 25 |       expect(typeof logging.logWarn).toBe("function");
 26 |       expect(logging.logError).toBeDefined();
 27 |       expect(typeof logging.logError).toBe("function");
 28 |     });
 29 | 
 30 |     it("should define LogLevel type", async () => {
 31 |       // This is a type-only test, so we just verify it compiles
 32 |       const level: LogLevel = "info";
 33 |       expect(["info", "warn", "error"]).toContain(level);
 34 |     });
 35 | 
 36 |     it("should log error messages to console.error", async () => {
 37 |       const { log } = await import("../../utils/logging.js");
 38 | 
 39 |       const testMessage = "Test error message";
 40 |       log("error", testMessage);
 41 | 
 42 |       expect(mockConsoleError).toHaveBeenCalledWith(
 43 |         expect.stringMatching(
 44 |           /^\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z\] \[ERROR\] Test error message$/,
 45 |         ),
 46 |       );
 47 |     });
 48 | 
 49 |     it("should log error messages with metadata to console.error", async () => {
 50 |       const { log } = await import("../../utils/logging.js");
 51 | 
 52 |       const testMessage = "Test error with metadata";
 53 |       const testMeta = { userId: "123", action: "login" };
 54 |       log("error", testMessage, testMeta);
 55 | 
 56 |       expect(mockConsoleError).toHaveBeenCalledWith(
 57 |         expect.stringMatching(
 58 |           /^\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z\] \[ERROR\] Test error with metadata$/,
 59 |         ),
 60 |         testMeta,
 61 |       );
 62 |     });
 63 | 
 64 |     it("should log info messages to console.error", async () => {
 65 |       const { log } = await import("../../utils/logging.js");
 66 | 
 67 |       const testMessage = "Test info message";
 68 |       log("info", testMessage);
 69 | 
 70 |       expect(mockConsoleError).toHaveBeenCalledWith(
 71 |         expect.stringMatching(
 72 |           /^\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z\] \[INFO\] Test info message$/,
 73 |         ),
 74 |       );
 75 |     });
 76 | 
 77 |     it("should log info messages with metadata to console.error", async () => {
 78 |       const { log } = await import("../../utils/logging.js");
 79 | 
 80 |       const testMessage = "Test info with metadata";
 81 |       const testMeta = { processId: "abc", status: "running" };
 82 |       log("info", testMessage, testMeta);
 83 | 
 84 |       expect(mockConsoleError).toHaveBeenCalledWith(
 85 |         expect.stringMatching(
 86 |           /^\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z\] \[INFO\] Test info with metadata$/,
 87 |         ),
 88 |         testMeta,
 89 |       );
 90 |     });
 91 | 
 92 |     it("should filter out most warn messages", async () => {
 93 |       const { log } = await import("../../utils/logging.js");
 94 | 
 95 |       const testMessage = "Regular warning message";
 96 |       log("warn", testMessage);
 97 | 
 98 |       expect(mockConsoleError).not.toHaveBeenCalled();
 99 |     });
100 | 
101 |     it("should log warn messages containing 'CAPTCHA'", async () => {
102 |       const { log } = await import("../../utils/logging.js");
103 | 
104 |       const testMessage = "CAPTCHA detected during operation";
105 |       log("warn", testMessage);
106 | 
107 |       expect(mockConsoleError).toHaveBeenCalledWith(
108 |         expect.stringMatching(
109 |           /^\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z\] \[WARN\] CAPTCHA detected during operation$/,
110 |         ),
111 |       );
112 |     });
113 | 
114 |     it("should log warn messages containing 'failed'", async () => {
115 |       const { log } = await import("../../utils/logging.js");
116 | 
117 |       const testMessage = "Operation failed unexpectedly";
118 |       log("warn", testMessage);
119 | 
120 |       expect(mockConsoleError).toHaveBeenCalledWith(
121 |         expect.stringMatching(
122 |           /^\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z\] \[WARN\] Operation failed unexpectedly$/,
123 |         ),
124 |       );
125 |     });
126 | 
127 |     it("should log warn messages with metadata when they contain 'CAPTCHA' or 'failed'", async () => {
128 |       const { log } = await import("../../utils/logging.js");
129 | 
130 |       const testMessage = "CAPTCHA challenge failed";
131 |       const testMeta = { url: "https://example.com", attempt: 3 };
132 |       log("warn", testMessage, testMeta);
133 | 
134 |       expect(mockConsoleError).toHaveBeenCalledWith(
135 |         expect.stringMatching(
136 |           /^\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z\] \[WARN\] CAPTCHA challenge failed$/,
137 |         ),
138 |         testMeta,
139 |       );
140 |     });
141 |   });
142 | 
143 |   describe("Convenience logging functions", () => {
144 |     it("should log info messages using logInfo helper", async () => {
145 |       const { logInfo } = await import("../../utils/logging.js");
146 | 
147 |       const testMessage = "Test info via helper";
148 |       logInfo(testMessage);
149 | 
150 |       expect(mockConsoleError).toHaveBeenCalledWith(
151 |         expect.stringMatching(
152 |           /^\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z\] \[INFO\] Test info via helper$/,
153 |         ),
154 |       );
155 |     });
156 | 
157 |     it("should log warn messages using logWarn helper", async () => {
158 |       const { logWarn } = await import("../../utils/logging.js");
159 | 
160 |       const testMessage = "CAPTCHA warning via helper";
161 |       logWarn(testMessage);
162 | 
163 |       expect(mockConsoleError).toHaveBeenCalledWith(
164 |         expect.stringMatching(
165 |           /^\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z\] \[WARN\] CAPTCHA warning via helper$/,
166 |         ),
167 |       );
168 |     });
169 | 
170 |     it("should log error messages using logError helper", async () => {
171 |       const { logError } = await import("../../utils/logging.js");
172 | 
173 |       const testMessage = "Test error via helper";
174 |       logError(testMessage);
175 | 
176 |       expect(mockConsoleError).toHaveBeenCalledWith(
177 |         expect.stringMatching(
178 |           /^\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z\] \[ERROR\] Test error via helper$/,
179 |         ),
180 |       );
181 |     });
182 | 
183 |     it("should support metadata in convenience functions", async () => {
184 |       const { logInfo, logWarn, logError } = await import("../../utils/logging.js");
185 | 
186 |       const testMeta = { component: "test-suite", version: "1.0" };
187 | 
188 |       logInfo("Info with metadata", testMeta);
189 |       logWarn("CAPTCHA with metadata", testMeta);
190 |       logError("Error with metadata", testMeta);
191 | 
192 |       expect(mockConsoleError).toHaveBeenNthCalledWith(
193 |         1,
194 |         expect.stringMatching(
195 |           /^\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z\] \[INFO\] Info with metadata$/,
196 |         ),
197 |         testMeta,
198 |       );
199 | 
200 |       expect(mockConsoleError).toHaveBeenNthCalledWith(
201 |         2,
202 |         expect.stringMatching(
203 |           /^\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z\] \[WARN\] CAPTCHA with metadata$/,
204 |         ),
205 |         testMeta,
206 |       );
207 | 
208 |       expect(mockConsoleError).toHaveBeenNthCalledWith(
209 |         3,
210 |         expect.stringMatching(
211 |           /^\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z\] \[ERROR\] Error with metadata$/,
212 |         ),
213 |         testMeta,
214 |       );
215 |     });
216 |   });
217 | 
218 |   describe("Timestamp formatting", () => {
219 |     it("should include ISO formatted timestamps in log messages", async () => {
220 |       const { log } = await import("../../utils/logging.js");
221 | 
222 |       const before = new Date().toISOString();
223 |       log("info", "Timestamp test");
224 |       const after = new Date().toISOString();
225 | 
226 |       expect(mockConsoleError).toHaveBeenCalledWith(
227 |         expect.stringMatching(
228 |           /^\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z\] \[INFO\] Timestamp test$/,
229 |         ),
230 |       );
231 | 
232 |       // Verify timestamp is between before and after
233 |       const callArg = mockConsoleError.mock.calls[0]?.[0] as string;
234 |       const timestampStr = callArg?.match(/\[(.*?)\]/)?.[1];
235 |       if (timestampStr) {
236 |         const timestamp = new Date(timestampStr);
237 |         expect(timestamp.getTime()).toBeGreaterThanOrEqual(new Date(before).getTime());
238 |         expect(timestamp.getTime()).toBeLessThanOrEqual(new Date(after).getTime());
239 |       }
240 |     });
241 | 
242 |     it("should uppercase log levels in output", async () => {
243 |       const { log } = await import("../../utils/logging.js");
244 | 
245 |       log("info", "Level test");
246 |       log("warn", "CAPTCHA level test");
247 |       log("error", "Level test");
248 | 
249 |       expect(mockConsoleError).toHaveBeenNthCalledWith(1, expect.stringContaining("[INFO]"));
250 | 
251 |       expect(mockConsoleError).toHaveBeenNthCalledWith(2, expect.stringContaining("[WARN]"));
252 | 
253 |       expect(mockConsoleError).toHaveBeenNthCalledWith(3, expect.stringContaining("[ERROR]"));
254 |     });
255 |   });
256 | 
257 |   describe("Edge cases", () => {
258 |     it("should handle empty messages", async () => {
259 |       const { log } = await import("../../utils/logging.js");
260 | 
261 |       log("info", "");
262 | 
263 |       expect(mockConsoleError).toHaveBeenCalledWith(
264 |         expect.stringMatching(/^\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z\] \[INFO\] $/),
265 |       );
266 |     });
267 | 
268 |     it("should handle special characters in messages", async () => {
269 |       const { log } = await import("../../utils/logging.js");
270 | 
271 |       const specialMessage = "Message with 'quotes', \"double quotes\", \n newlines, and \t tabs";
272 |       log("error", specialMessage);
273 | 
274 |       expect(mockConsoleError).toHaveBeenCalledWith(
275 |         expect.stringMatching(
276 |           /^\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z\] \[ERROR\] Message with 'quotes', "double quotes", \n newlines, and \t tabs$/,
277 |         ),
278 |       );
279 |     });
280 | 
281 |     it("should handle empty metadata objects", async () => {
282 |       const { log } = await import("../../utils/logging.js");
283 | 
284 |       log("info", "Empty metadata test", {});
285 | 
286 |       expect(mockConsoleError).toHaveBeenCalledWith(
287 |         expect.stringMatching(
288 |           /^\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z\] \[INFO\] Empty metadata test$/,
289 |         ),
290 |       );
291 |     });
292 | 
293 |     it("should handle undefined metadata", async () => {
294 |       const { log } = await import("../../utils/logging.js");
295 | 
296 |       log("info", "Undefined metadata test", undefined);
297 | 
298 |       expect(mockConsoleError).toHaveBeenCalledWith(
299 |         expect.stringMatching(
300 |           /^\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z\] \[INFO\] Undefined metadata test$/,
301 |         ),
302 |       );
303 |     });
304 |   });
305 | });
306 | 


--------------------------------------------------------------------------------
/src/schema/toolSchemas.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * MCP Tool Schema Definitions
  3 |  * Comprehensive schemas for all available tools including descriptions, input/output schemas, examples, and metadata
  4 |  */
  5 | 
  6 | export const TOOL_SCHEMAS = [
  7 |   {
  8 |     name: "chat_perplexity",
  9 |     description:
 10 |       "Automatically call this tool for interactive, conversational queries. This tool leverages Perplexitys web search capabilities to provide real-time information and maintains conversation history using an optional chat ID for contextual follow-ups.",
 11 |     category: "Conversation",
 12 |     keywords: ["chat", "conversation", "dialog", "discussion", "advice", "brainstorm", "debug"],
 13 |     use_cases: [
 14 |       "Continuing multi-turn conversations",
 15 |       "Context-aware question answering",
 16 |       "Follow-up questions",
 17 |     ],
 18 |     inputSchema: {
 19 |       type: "object",
 20 |       properties: {
 21 |         message: {
 22 |           type: "string",
 23 |           description: "The message to send to Perplexity AI for web search",
 24 |           examples: [
 25 |             "Explain quantum computing",
 26 |             "Continue our previous discussion about AI safety",
 27 |           ],
 28 |         },
 29 |         chat_id: {
 30 |           type: "string",
 31 |           description:
 32 |             "Optional: ID of an existing chat to continue. If not provided, a new chat will be created.",
 33 |           examples: ["123e4567-e89b-12d3-a456-426614174000"],
 34 |         },
 35 |       },
 36 |       required: ["message"],
 37 |     },
 38 |     examples: [
 39 |       {
 40 |         description: "Simple question",
 41 |         input: { message: "Explain quantum computing basics" },
 42 |         output: {
 43 |           chat_id: "new-chat-id",
 44 |           response: "Quantum computing uses qubits that can exist in superposition...",
 45 |         },
 46 |       },
 47 |       {
 48 |         description: "Continuing conversation",
 49 |         input: {
 50 |           message: "How does that compare to classical computing?",
 51 |           chat_id: "existing-chat-id",
 52 |         },
 53 |         output: {
 54 |           chat_id: "existing-chat-id",
 55 |           response: "Classical computers use bits that are either 0 or 1, while quantum...",
 56 |         },
 57 |       },
 58 |     ],
 59 |     related_tools: ["search", "get_documentation"],
 60 |   },
 61 |   {
 62 |     name: "extract_url_content",
 63 |     description:
 64 |       "Uses browser automation (Puppeteer) and Mozilla's Readability library to extract the main article text content from a given URL. Handles dynamic JavaScript rendering and includes fallback logic. For GitHub repository URLs, it attempts to fetch structured content via gitingest.com. Performs a pre-check for non-HTML content types and checks HTTP status after navigation. Ideal for getting clean text from articles/blog posts. **Note: May struggle to isolate only core content on complex homepages or dashboards, potentially including UI elements.**",
 65 |     category: "Information Extraction",
 66 |     keywords: [
 67 |       "extract",
 68 |       "url",
 69 |       "website",
 70 |       "content",
 71 |       "scrape",
 72 |       "summarize",
 73 |       "webpage",
 74 |       "fetch",
 75 |       "readability",
 76 |       "article",
 77 |       "dom",
 78 |       "puppeteer",
 79 |       "github",
 80 |       "gitingest",
 81 |       "repository",
 82 |     ],
 83 |     use_cases: [
 84 |       "Getting the main text of a news article or blog post.",
 85 |       "Summarizing web page content.",
 86 |       "Extracting documentation text.",
 87 |       "Providing website context to other models.",
 88 |     ],
 89 |     inputSchema: {
 90 |       type: "object",
 91 |       properties: {
 92 |         url: {
 93 |           type: "string",
 94 |           description: "The URL of the website to extract content from.",
 95 |           examples: ["https://www.example.com/article"],
 96 |         },
 97 |         depth: {
 98 |           type: "number",
 99 |           description:
100 |             "Optional: Maximum depth for recursive link exploration (1-5). Default is 1 (no recursion).",
101 |           minimum: 1,
102 |           maximum: 5,
103 |           default: 1,
104 |           examples: [1, 3],
105 |         },
106 |       },
107 |       required: ["url"],
108 |     },
109 |     examples: [
110 |       {
111 |         description: "Successful extraction from an article",
112 |         input: { url: "https://example-article-url.com" },
113 |         output: {
114 |           status: "Success",
115 |           rootUrl: "https://example-article-url.com",
116 |           explorationDepth: 1,
117 |           pagesExplored: 1,
118 |           content: [
119 |             {
120 |               url: "https://example-article-url.com",
121 |               title: "Example Article Title",
122 |               textContent: "The main body text of the article...",
123 |             },
124 |           ],
125 |         },
126 |       },
127 |     ],
128 |     related_tools: ["search", "get_documentation"],
129 |   },
130 |   {
131 |     name: "get_documentation",
132 |     description:
133 |       'Automatically call this tool when working with unfamiliar APIs/libraries, needing usage examples, or checking version specifics as this can access web. Example: When adding a payment gateway, ask "Get Stripe API documentation for creating charges".',
134 |     category: "Technical Reference",
135 |     keywords: ["docs", "documentation", "api", "reference", "examples", "usage", "version"],
136 |     use_cases: ["Learning new technologies", "API integration", "Troubleshooting code"],
137 |     inputSchema: {
138 |       type: "object",
139 |       properties: {
140 |         query: {
141 |           type: "string",
142 |           description: "The technology, library, or API to get documentation for",
143 |           examples: ["React hooks", "Python pandas", "REST API best practices"],
144 |         },
145 |         context: {
146 |           type: "string",
147 |           description: "Additional context or specific aspects to focus on",
148 |           examples: ["focus on performance optimization", "include TypeScript examples"],
149 |         },
150 |       },
151 |       required: ["query"],
152 |     },
153 |     examples: [
154 |       {
155 |         description: "Basic documentation request",
156 |         input: { query: "React useEffect hook" },
157 |         output: {
158 |           response: "The useEffect hook lets you perform side effects in function components...",
159 |         },
160 |       },
161 |     ],
162 |     related_tools: ["search", "check_deprecated_code"],
163 |   },
164 |   {
165 |     name: "find_apis",
166 |     description:
167 |       'Automatically call this tool when needing external services or real time current data (like API info, latest versions, etc.) from web. Compares options based on requirements. Example: When building a shopping site, ask "Find product image APIs with free tiers".',
168 |     category: "API Discovery",
169 |     keywords: ["api", "integration", "services", "endpoints", "sdk", "data", "external"],
170 |     use_cases: [
171 |       "Finding APIs for specific functionality",
172 |       "Comparing API alternatives",
173 |       "Evaluating API suitability",
174 |     ],
175 |     inputSchema: {
176 |       type: "object",
177 |       properties: {
178 |         requirement: {
179 |           type: "string",
180 |           description: "The functionality or requirement you are looking to fulfill",
181 |           examples: ["image recognition", "payment processing", "geolocation services"],
182 |         },
183 |         context: {
184 |           type: "string",
185 |           description: "Additional context about the project or specific needs",
186 |           examples: ["prefer free tier options", "must support Python SDK"],
187 |         },
188 |       },
189 |       required: ["requirement"],
190 |     },
191 |     examples: [
192 |       {
193 |         description: "Finding payment APIs",
194 |         input: {
195 |           requirement: "payment processing",
196 |           context: "needs Stripe alternative",
197 |         },
198 |         output: {
199 |           response: "PayPal offers global payment processing with 2.9% + $0.30 per transaction...",
200 |         },
201 |       },
202 |     ],
203 |     related_tools: ["get_documentation", "search"],
204 |   },
205 |   {
206 |     name: "check_deprecated_code",
207 |     description:
208 |       "Automatically call this tool when reviewing legacy code, planning upgrades, or encountering warnings with real time web access. Helps identify technical debt. Example: During code reviews or before upgrading dependencies.",
209 |     category: "Code Analysis",
210 |     keywords: ["deprecation", "migration", "upgrade", "compatibility", "linting", "legacy", "debt"],
211 |     use_cases: [
212 |       "Preparing for technology upgrades",
213 |       "Maintaining backward compatibility",
214 |       "Identifying technical debt",
215 |     ],
216 |     inputSchema: {
217 |       type: "object",
218 |       properties: {
219 |         code: {
220 |           type: "string",
221 |           description: "The code snippet or dependency to check",
222 |           examples: ["componentWillMount()", "var instead of let/const"],
223 |         },
224 |         technology: {
225 |           type: "string",
226 |           description: 'The technology or framework context (e.g., "React", "Node.js")',
227 |           examples: ["React 16", "Python 2.7", "Node.js 12"],
228 |         },
229 |       },
230 |       required: ["code"],
231 |     },
232 |     examples: [
233 |       {
234 |         description: "React lifecycle method deprecation",
235 |         input: {
236 |           code: "componentWillMount() {\n  // initialization code\n}",
237 |           technology: "React",
238 |         },
239 |         output: {
240 |           response:
241 |             "componentWillMount is deprecated in React 17+. Use constructor or componentDidMount instead...",
242 |         },
243 |       },
244 |     ],
245 |     related_tools: ["get_documentation", "search"],
246 |   },
247 |   {
248 |     name: "search",
249 |     description:
250 |       "Performs a web search using Perplexity AI based on the provided query and desired detail level. Useful for general knowledge questions, finding information, or getting different perspectives.",
251 |     category: "Web Search",
252 |     keywords: ["search", "web", "internet", "query", "find", "information", "lookup", "perplexity"],
253 |     use_cases: [
254 |       "Answering general knowledge questions.",
255 |       "Finding specific information online.",
256 |       "Getting quick summaries or detailed explanations.",
257 |       "Researching topics.",
258 |     ],
259 |     inputSchema: {
260 |       type: "object",
261 |       properties: {
262 |         query: {
263 |           type: "string",
264 |           description: "The search query or question to ask Perplexity.",
265 |           examples: ["What is the capital of France?", "Explain black holes"],
266 |         },
267 |         detail_level: {
268 |           type: "string",
269 |           enum: ["brief", "normal", "detailed"],
270 |           description: "Optional: Controls the level of detail in the response (default: normal).",
271 |           examples: ["brief", "detailed"],
272 |         },
273 |         stream: {
274 |           type: "boolean",
275 |           description:
276 |             "Optional: Enable streaming response for large documentation queries (default: false).",
277 |           examples: [true, false],
278 |         },
279 |       },
280 |       required: ["query"],
281 |     },
282 |     examples: [
283 |       {
284 |         description: "Simple search query",
285 |         input: { query: "What is the weather in London?" },
286 |         output: { response: "The weather in London is currently..." },
287 |       },
288 |       {
289 |         description: "Detailed search query",
290 |         input: { query: "Explain the theory of relativity", detail_level: "detailed" },
291 |         output: {
292 |           response:
293 |             "Albert Einstein's theory of relativity includes Special Relativity and General Relativity...",
294 |         },
295 |       },
296 |     ],
297 |     related_tools: ["chat_perplexity", "get_documentation", "find_apis"],
298 |   },
299 | ] as const;
300 | 


--------------------------------------------------------------------------------
/src/__tests__/integration/server.test.ts:
--------------------------------------------------------------------------------
  1 | import { describe, expect, it, vi } from "vitest";
  2 | import { PerplexityServer } from "../../server/PerplexityServer.js";
  3 | import type { IBrowserManager, IDatabaseManager, ISearchEngine } from "../../types/index.js";
  4 | 
  5 | // Mock the modules to avoid actual browser initialization and database connections
  6 | vi.mock("../../server/modules/BrowserManager.js", () => {
  7 |   return {
  8 |     BrowserManager: vi.fn().mockImplementation(() => ({
  9 |       initialize: vi.fn().mockResolvedValue(undefined),
 10 |       isReady: vi.fn().mockReturnValue(true),
 11 |       cleanup: vi.fn().mockResolvedValue(undefined),
 12 |       getPuppeteerContext: vi.fn().mockReturnValue({
 13 |         browser: null,
 14 |         page: null,
 15 |         isInitializing: false,
 16 |         searchInputSelector: 'textarea[placeholder*="Ask"]',
 17 |         lastSearchTime: 0,
 18 |         idleTimeout: null,
 19 |         operationCount: 0,
 20 |         log: vi.fn(),
 21 |         setBrowser: vi.fn(),
 22 |         setPage: vi.fn(),
 23 |         setIsInitializing: vi.fn(),
 24 |         setSearchInputSelector: vi.fn(),
 25 |         setIdleTimeout: vi.fn(),
 26 |         incrementOperationCount: vi.fn(),
 27 |         determineRecoveryLevel: vi.fn(),
 28 |         IDLE_TIMEOUT_MS: 300000,
 29 |       }),
 30 |     })),
 31 |   };
 32 | });
 33 | 
 34 | vi.mock("../../server/modules/DatabaseManager.js", () => {
 35 |   return {
 36 |     DatabaseManager: vi.fn().mockImplementation(() => ({
 37 |       initialize: vi.fn(),
 38 |       close: vi.fn(),
 39 |       getChatHistory: vi.fn().mockReturnValue([]),
 40 |       saveChatMessage: vi.fn(),
 41 |       isInitialized: vi.fn().mockReturnValue(true),
 42 |     })),
 43 |   };
 44 | });
 45 | 
 46 | vi.mock("../../server/modules/SearchEngine.js", () => {
 47 |   return {
 48 |     SearchEngine: vi.fn().mockImplementation(() => ({
 49 |       performSearch: vi.fn().mockResolvedValue("Mock search result"),
 50 |     })),
 51 |   };
 52 | });
 53 | 
 54 | // Mock logging
 55 | vi.mock("../../utils/logging.js", () => ({
 56 |   logInfo: vi.fn(),
 57 |   logWarn: vi.fn(),
 58 |   logError: vi.fn(),
 59 | }));
 60 | 
 61 | describe("MCP Server Integration", () => {
 62 |   describe("Server initialization", () => {
 63 |     it("should initialize server components successfully", () => {
 64 |       const server = new PerplexityServer();
 65 | 
 66 |       expect(server).toBeDefined();
 67 |       expect(server.getBrowserManager()).toBeDefined();
 68 |       expect(server.getSearchEngine()).toBeDefined();
 69 |       expect(server.getDatabaseManager()).toBeDefined();
 70 |     });
 71 | 
 72 |     it("should initialize server with custom dependencies", () => {
 73 |       // Mock dependencies
 74 |       const mockBrowserManager: IBrowserManager = {
 75 |         initialize: vi.fn().mockResolvedValue(undefined),
 76 |         navigateToPerplexity: vi.fn().mockResolvedValue(undefined),
 77 |         waitForSearchInput: vi.fn().mockResolvedValue("textarea"),
 78 |         checkForCaptcha: vi.fn().mockResolvedValue(false),
 79 |         performRecovery: vi.fn().mockResolvedValue(undefined),
 80 |         isReady: vi.fn().mockReturnValue(true),
 81 |         cleanup: vi.fn().mockResolvedValue(undefined),
 82 |         getPage: vi.fn().mockReturnValue(null),
 83 |         getBrowser: vi.fn().mockReturnValue(null),
 84 |         resetIdleTimeout: vi.fn(),
 85 |         getPuppeteerContext: vi.fn().mockReturnValue({}),
 86 |       };
 87 | 
 88 |       const mockSearchEngine: ISearchEngine = {
 89 |         performSearch: vi.fn().mockResolvedValue("Custom search result"),
 90 |       };
 91 | 
 92 |       const mockDatabaseManager: IDatabaseManager = {
 93 |         initialize: vi.fn(),
 94 |         close: vi.fn(),
 95 |         getChatHistory: vi.fn().mockReturnValue([]),
 96 |         saveChatMessage: vi.fn(),
 97 |         isInitialized: vi.fn().mockReturnValue(true),
 98 |       };
 99 | 
100 |       const dependencies = {
101 |         browserManager: mockBrowserManager,
102 |         searchEngine: mockSearchEngine,
103 |         databaseManager: mockDatabaseManager,
104 |       };
105 | 
106 |       const server = new PerplexityServer(dependencies);
107 | 
108 |       expect(server).toBeDefined();
109 |       expect(mockDatabaseManager.initialize).toHaveBeenCalled();
110 |     });
111 | 
112 |     it("should initialize database during server startup", () => {
113 |       const server = new PerplexityServer();
114 |       const databaseManager = server.getDatabaseManager();
115 | 
116 |       // Since we mocked the DatabaseManager, we can check if initialize was called
117 |       expect(databaseManager.initialize).toHaveBeenCalled();
118 |     });
119 |   });
120 | 
121 |   describe("Tool registration", () => {
122 |     it("should register all required tools", () => {
123 |       const server = new PerplexityServer();
124 | 
125 |       // We can't directly access the tool handlers, but we can verify the server was created
126 |       expect(server).toBeDefined();
127 | 
128 |       // Check that we have the expected number of tool handlers
129 |       const requiredTools = [
130 |         "chat_perplexity",
131 |         "search",
132 |         "extract_url_content",
133 |         "get_documentation",
134 |         "find_apis",
135 |         "check_deprecated_code",
136 |       ];
137 | 
138 |       expect(requiredTools.length).toBe(6);
139 |     });
140 | 
141 |     it("should verify all 6 tools are properly registered", () => {
142 |       const server = new PerplexityServer();
143 | 
144 |       // Verify the server was created successfully
145 |       expect(server).toBeDefined();
146 | 
147 |       // Check that all required tools are accounted for
148 |       const requiredTools = [
149 |         "chat_perplexity",
150 |         "search",
151 |         "extract_url_content",
152 |         "get_documentation",
153 |         "find_apis",
154 |         "check_deprecated_code",
155 |       ];
156 | 
157 |       // Test that all tools are present in our list
158 |       for (const tool of requiredTools) {
159 |         expect(requiredTools).toContain(tool);
160 |       }
161 |     });
162 | 
163 |     it("should handle dynamic tool handler registration", () => {
164 |       // Test that the server can be instantiated and tool handlers are set up
165 |       const server = new PerplexityServer();
166 | 
167 |       expect(server).toBeDefined();
168 |       // The setupToolHandlers method is called in the constructor
169 |       // We can't directly test the registration without exposing internals,
170 |       // but we can verify the server was created successfully
171 |     });
172 |   });
173 | 
174 |   describe("End-to-end workflows", () => {
175 |     it("should handle basic search workflow", async () => {
176 |       const server = new PerplexityServer();
177 |       const searchEngine = server.getSearchEngine();
178 | 
179 |       // Mock the search engine to return a specific result
180 |       vi.mocked(searchEngine.performSearch).mockResolvedValue("Test search result");
181 | 
182 |       const result = await searchEngine.performSearch("test query");
183 | 
184 |       expect(result).toBe("Test search result");
185 |       expect(searchEngine.performSearch).toHaveBeenCalledWith("test query");
186 |     });
187 | 
188 |     it("should handle complete chat flow from request to response", async () => {
189 |       const server = new PerplexityServer();
190 |       const searchEngine = server.getSearchEngine();
191 | 
192 |       // Mock the search engine to return a specific result
193 |       vi.mocked(searchEngine.performSearch).mockResolvedValue("Chat response");
194 | 
195 |       const result = await searchEngine.performSearch("Hello, how are you?");
196 | 
197 |       expect(result).toBe("Chat response");
198 |       expect(searchEngine.performSearch).toHaveBeenCalledWith("Hello, how are you?");
199 |     });
200 | 
201 |     it("should handle complete search flow with different query types", async () => {
202 |       const server = new PerplexityServer();
203 |       const searchEngine = server.getSearchEngine();
204 | 
205 |       // Test different types of queries
206 |       const queries = [
207 |         "What is TypeScript?",
208 |         "How to use React hooks?",
209 |         "Explain quantum computing",
210 |       ];
211 | 
212 |       for (const query of queries) {
213 |         vi.mocked(searchEngine.performSearch).mockResolvedValueOnce(`Result for: ${query}`);
214 |         const result = await searchEngine.performSearch(query);
215 | 
216 |         expect(result).toBe(`Result for: ${query}`);
217 |         expect(searchEngine.performSearch).toHaveBeenCalledWith(query);
218 |       }
219 |     });
220 | 
221 |     it("should handle complete content extraction flow with various URLs", async () => {
222 |       const server = new PerplexityServer();
223 |       const browserManager = server.getBrowserManager();
224 | 
225 |       // Verify that browser manager is properly initialized
226 |       expect(browserManager).toBeDefined();
227 |       expect(browserManager.isReady).toBeDefined();
228 | 
229 |       // Mock browser manager readiness
230 |       vi.mocked(browserManager.isReady).mockReturnValue(true);
231 | 
232 |       expect(browserManager.isReady()).toBe(true);
233 |     });
234 | 
235 |     it("should handle documentation lookup workflow", async () => {
236 |       const server = new PerplexityServer();
237 |       const searchEngine = server.getSearchEngine();
238 | 
239 |       // Mock the search engine to return a documentation result
240 |       vi.mocked(searchEngine.performSearch).mockResolvedValue("Documentation for React hooks");
241 | 
242 |       const result = await searchEngine.performSearch(
243 |         "Documentation for React hooks: focus on performance",
244 |       );
245 | 
246 |       expect(result).toBe("Documentation for React hooks");
247 |       expect(searchEngine.performSearch).toHaveBeenCalledWith(
248 |         "Documentation for React hooks: focus on performance",
249 |       );
250 |     });
251 | 
252 |     it("should handle API discovery workflow", async () => {
253 |       const server = new PerplexityServer();
254 |       const searchEngine = server.getSearchEngine();
255 | 
256 |       // Mock the search engine to return an API discovery result
257 |       vi.mocked(searchEngine.performSearch).mockResolvedValue("APIs for image recognition");
258 | 
259 |       const result = await searchEngine.performSearch(
260 |         "Find APIs for image recognition: prefer free tier options",
261 |       );
262 | 
263 |       expect(result).toBe("APIs for image recognition");
264 |       expect(searchEngine.performSearch).toHaveBeenCalledWith(
265 |         "Find APIs for image recognition: prefer free tier options",
266 |       );
267 |     });
268 | 
269 |     it("should handle deprecated code checking workflow", async () => {
270 |       const server = new PerplexityServer();
271 |       const searchEngine = server.getSearchEngine();
272 | 
273 |       // Mock the search engine to return a deprecation check result
274 |       vi.mocked(searchEngine.performSearch).mockResolvedValue("componentWillMount is deprecated");
275 | 
276 |       const result = await searchEngine.performSearch(
277 |         "Check if this code is deprecated: componentWillMount()",
278 |       );
279 | 
280 |       expect(result).toBe("componentWillMount is deprecated");
281 |       expect(searchEngine.performSearch).toHaveBeenCalledWith(
282 |         "Check if this code is deprecated: componentWillMount()",
283 |       );
284 |     });
285 |   });
286 | 
287 |   describe("Error scenario testing", () => {
288 |     it("should handle timeout handling in integrated environment", async () => {
289 |       const server = new PerplexityServer();
290 |       const searchEngine = server.getSearchEngine();
291 | 
292 |       // Mock search engine to simulate a timeout
293 |       vi.mocked(searchEngine.performSearch).mockRejectedValue(new Error("Search timeout"));
294 | 
295 |       await expect(searchEngine.performSearch("slow query")).rejects.toThrow("Search timeout");
296 |     });
297 | 
298 |     it("should handle malformed request handling", async () => {
299 |       const server = new PerplexityServer();
300 |       const searchEngine = server.getSearchEngine();
301 | 
302 |       // Test with empty query
303 |       vi.mocked(searchEngine.performSearch).mockResolvedValue("Empty query response");
304 | 
305 |       const result = await searchEngine.performSearch("");
306 | 
307 |       expect(result).toBe("Empty query response");
308 |       expect(searchEngine.performSearch).toHaveBeenCalledWith("");
309 |     });
310 | 
311 |     it("should handle recovery procedures in integrated environment", async () => {
312 |       const server = new PerplexityServer();
313 |       const browserManager = server.getBrowserManager();
314 | 
315 |       // Test that cleanup method exists and can be called
316 |       expect(browserManager.cleanup).toBeDefined();
317 | 
318 |       // Mock cleanup to resolve successfully
319 |       vi.mocked(browserManager.cleanup).mockResolvedValue(undefined);
320 | 
321 |       await expect(browserManager.cleanup()).resolves.toBeUndefined();
322 |     });
323 |   });
324 | });
325 | 


--------------------------------------------------------------------------------
/src/server/modules/SearchEngine.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * SearchEngine - Handles search operations and answer extraction
  3 |  * Focused, testable module for Perplexity search functionality
  4 |  */
  5 | import type { Page } from "puppeteer";
  6 | import type { IBrowserManager, ISearchEngine } from "../../types/index.js";
  7 | import { logError, logInfo, logWarn } from "../../utils/logging.js";
  8 | import { retryOperation } from "../../utils/puppeteer.js";
  9 | import { CONFIG } from "../config.js";
 10 | 
 11 | export class SearchEngine implements ISearchEngine {
 12 |   constructor(private readonly browserManager: IBrowserManager) {}
 13 | 
 14 |   async performSearch(query: string): Promise<string> {
 15 |     // Set a global timeout for the entire operation with buffer for MCP
 16 |     const operationTimeout = setTimeout(() => {
 17 |       logError("Global operation timeout reached, initiating recovery...");
 18 |       this.browserManager.performRecovery().catch((err: unknown) => {
 19 |         logError("Recovery after timeout failed:", {
 20 |           error: err instanceof Error ? err.message : String(err),
 21 |         });
 22 |       });
 23 |     }, CONFIG.PAGE_TIMEOUT - CONFIG.MCP_TIMEOUT_BUFFER);
 24 | 
 25 |     try {
 26 |       // Ensure browser is ready
 27 |       if (!this.browserManager.isReady()) {
 28 |         logInfo("Browser not ready, initializing...");
 29 |         await this.browserManager.initialize();
 30 |       }
 31 | 
 32 |       // Reset idle timeout
 33 |       this.browserManager.resetIdleTimeout();
 34 | 
 35 |       // Use retry operation for the entire search process with increased retries
 36 |       const ctx = this.browserManager.getPuppeteerContext();
 37 |       
 38 |       return await retryOperation(ctx, async () => {
 39 |         logInfo(`Navigating to Perplexity for query: "${query.substring(0, 30)}${query.length > 30 ? '...' : ''}"`);
 40 |         await this.browserManager.navigateToPerplexity();
 41 | 
 42 |         // Validate main frame is attached
 43 |         const page = this.browserManager.getPage();
 44 |         if (!page || page.mainFrame().isDetached()) {
 45 |           logError("Main frame is detached, will retry with new browser instance");
 46 |           throw new Error("Main frame is detached");
 47 |         }
 48 | 
 49 |         logInfo("Waiting for search input...");
 50 |         const selector = await this.browserManager.waitForSearchInput();
 51 |         if (!selector) {
 52 |           logError("Search input not found, taking screenshot for debugging");
 53 |           if (page) {
 54 |             await page.screenshot({ path: "debug_search_input_not_found.png", fullPage: true });
 55 |           }
 56 |           throw new Error("Search input not found");
 57 |         }
 58 | 
 59 |         logInfo(`Found search input with selector: ${selector}`);
 60 | 
 61 |         // Perform the search
 62 |         await this.executeSearch(page, selector, query);
 63 | 
 64 |         // Wait for and extract the answer
 65 |         const answer = await this.waitForCompleteAnswer(page);
 66 |         return answer;
 67 |       }, CONFIG.MAX_RETRIES);
 68 |     } catch (error) {
 69 |       logError("Search operation failed after all retries:", {
 70 |         error: error instanceof Error ? error.message : String(error),
 71 |       });
 72 | 
 73 |       // Handle specific error cases with user-friendly messages
 74 |       if (error instanceof Error) {
 75 |         if (error.message.includes("detached") || error.message.includes("Detached")) {
 76 |           logError("Frame detachment detected, attempting recovery...");
 77 |           await this.browserManager.performRecovery();
 78 |           return "The search operation encountered a technical issue. Please try again with a more specific query.";
 79 |         }
 80 | 
 81 |         if (error.message.includes("timeout") || error.message.includes("Timed out")) {
 82 |           logError("Timeout detected, attempting recovery...");
 83 |           await this.browserManager.performRecovery();
 84 |           return "The search operation is taking longer than expected. This might be due to high server load. Your query has been submitted and we're waiting for results. Please try again with a more specific query if needed.";
 85 |         }
 86 | 
 87 |         if (error.message.includes("navigation") || error.message.includes("Navigation")) {
 88 |           logError("Navigation error detected, attempting recovery...");
 89 |           await this.browserManager.performRecovery();
 90 |           return "The search operation encountered a navigation issue. This might be due to network connectivity problems. Please try again later.";
 91 |         }
 92 |       }
 93 | 
 94 |       // For any other errors, return a user-friendly message
 95 |       return `The search operation could not be completed. Error: ${error instanceof Error ? error.message : 'Unknown error'}. Please try again later with a more specific query.`;
 96 |     } finally {
 97 |       clearTimeout(operationTimeout);
 98 |     }
 99 |   }
100 | 
101 |   private async executeSearch(page: Page, selector: string, query: string): Promise<void> {
102 |     logInfo(`Executing search for: "${query.substring(0, 50)}${query.length > 50 ? "..." : ""}"`);
103 | 
104 |     // Clear any existing text
105 |     try {
106 |       await page.evaluate((sel) => {
107 |         const input = document.querySelector(sel) as HTMLTextAreaElement;
108 |         if (input) input.value = "";
109 |       }, selector);
110 | 
111 |       await page.click(selector, { clickCount: 3 });
112 |       await page.keyboard.press("Backspace");
113 |     } catch (clearError) {
114 |       logWarn("Error clearing input field:", {
115 |         error: clearError instanceof Error ? clearError.message : String(clearError),
116 |       });
117 |     }
118 | 
119 |     // Type the query with human-like delay for Cloudflare bypass
120 |     // Note: Math.random() is safe here - only used for anti-detection timing, not security
121 |     const typeDelay = Math.floor(Math.random() * 20) + 20; // 20-40ms delay (restored for better anti-detection)
122 |     await page.type(selector, query, { delay: typeDelay });
123 |     await page.keyboard.press("Enter");
124 | 
125 |     logInfo("Search query submitted successfully");
126 |   }
127 | 
128 |   private async waitForCompleteAnswer(page: Page): Promise<string> {
129 |     logInfo("Waiting for search response...");
130 | 
131 |     // First, wait for any response elements to appear
132 |     const proseSelectors = [".prose", '[class*="prose"]', '[class*="answer"]', '[class*="result"]'];
133 | 
134 |     let selectorFound = false;
135 |     for (const proseSelector of proseSelectors) {
136 |       try {
137 |         await page.waitForSelector(proseSelector, {
138 |           timeout: CONFIG.SELECTOR_TIMEOUT,
139 |           visible: true,
140 |         });
141 |         logInfo(`Found response with selector: ${proseSelector}`);
142 |         selectorFound = true;
143 |         break;
144 |       } catch (selectorError) {
145 |         logWarn(`Selector ${proseSelector} not found, trying next...`);
146 |       }
147 |     }
148 | 
149 |     if (!selectorFound) {
150 |       logError("No response selectors found, checking page state...");
151 |       // Check if page is still valid before throwing
152 |       if (!page || page.mainFrame().isDetached()) {
153 |         throw new Error("Page became invalid while waiting for response");
154 |       }
155 |       // Take a screenshot for debugging
156 |       await page.screenshot({ path: "debug_prose_not_found.png", fullPage: true });
157 | 
158 |       // Check if there's any visible text content that might contain an answer
159 |       const pageText = await page.evaluate(() => document.body.innerText);
160 |       if (pageText && pageText.length > 200) {
161 |         logInfo("Found text content on page, attempting to extract answer...");
162 |         // Try to extract meaningful content
163 |         return await this.extractFallbackAnswer(page);
164 |       }
165 | 
166 |       throw new Error("Timed out waiting for response from Perplexity");
167 |     }
168 | 
169 |     // Now wait for the complete answer using the sophisticated algorithm
170 |     const answer = await this.extractCompleteAnswer(page);
171 |     logInfo(`Answer received (${answer.length} characters)`);
172 | 
173 |     return answer;
174 |   }
175 | 
176 |   private async extractCompleteAnswer(page: Page): Promise<string> {
177 |     // Set a timeout to ensure we don't wait indefinitely, but make it much longer
178 |     const timeoutPromise = new Promise<string>((_, reject) => {
179 |       setTimeout(() => {
180 |         reject(new Error('Waiting for complete answer timed out'));
181 |       }, CONFIG.ANSWER_WAIT_TIMEOUT); // Use the dedicated answer wait timeout
182 |     });
183 | 
184 |     const answerPromise = page.evaluate(async () => {
185 |       // Security: URL scheme blocklist for preventing code injection attacks
186 |       const BLOCKED_URL_SCHEMES = [
187 |         "java" + "script:", // Prevents eval-like code execution
188 |         "data:", // Prevents data URI attacks
189 |         "vbs" + "cript:", // Prevents VBScript execution
190 |         "#", // Prevents anchor-only URLs
191 |       ];
192 | 
193 |       const isSafeUrl = (href: string): boolean => {
194 |         if (!href) return false;
195 |         for (const blockedScheme of BLOCKED_URL_SCHEMES) {
196 |           if (href.startsWith(blockedScheme)) {
197 |             return false;
198 |           }
199 |         }
200 |         return true;
201 |       };
202 | 
203 |       const getAnswer = () => {
204 |         const elements = Array.from(document.querySelectorAll(".prose"));
205 |         const answerText = elements.map((el) => (el as HTMLElement).innerText.trim()).join("\n\n");
206 | 
207 |         // Extract all URLs from the answer
208 |         const links = Array.from(document.querySelectorAll(".prose a[href]"));
209 |         const urls = links.map(link => (link as HTMLAnchorElement).href)
210 |           .filter(isSafeUrl)
211 |           .map(href => href.trim());
212 | 
213 |         // Combine text and URLs
214 |         if (urls.length > 0) {
215 |           return `${answerText}\n\nURLs:\n${urls.map(url => `- ${url}`).join('\n')}`;
216 |         }
217 |         return answerText;
218 |       };
219 | 
220 |       let lastAnswer = '';
221 |       let lastLength = 0;
222 |       let stabilityCounter = 0;
223 |       let noChangeCounter = 0;
224 |       const maxAttempts = 60; // Restored from backup for better answer quality
225 |       const checkInterval = 600; // Restored from backup
226 | 
227 |       for (let i = 0; i < maxAttempts; i++) {
228 |         await new Promise((resolve) => setTimeout(resolve, checkInterval));
229 |         const currentAnswer = getAnswer();
230 |         const currentLength = currentAnswer.length;
231 | 
232 |         if (currentLength > 0) {
233 |           if (currentLength > lastLength) {
234 |             lastLength = currentLength;
235 |             stabilityCounter = 0;
236 |             noChangeCounter = 0;
237 |           } else if (currentAnswer === lastAnswer) {
238 |             stabilityCounter++;
239 |             noChangeCounter++;
240 | 
241 |             if (currentLength > 1000 && stabilityCounter >= 3) {
242 |               console.log('Long answer stabilized, exiting early');
243 |               break;
244 |             } else if (currentLength > 500 && stabilityCounter >= 4) {
245 |               console.log('Medium answer stabilized, exiting');
246 |               break;
247 |             } else if (stabilityCounter >= 5) {
248 |               console.log('Short answer stabilized, exiting');
249 |               break;
250 |             }
251 |           } else {
252 |             noChangeCounter++;
253 |             stabilityCounter = 0;
254 |           }
255 |           lastAnswer = currentAnswer;
256 | 
257 |           if (noChangeCounter >= 10 && currentLength > 200) {
258 |             console.log('Content stopped growing but has sufficient information');
259 |             break;
260 |           }
261 |         }
262 | 
263 |         const lastProse = document.querySelector('.prose:last-child');
264 |         const isComplete = lastProse?.textContent?.includes('.') ||
265 |           lastProse?.textContent?.includes('?') ||
266 |           lastProse?.textContent?.includes('!');
267 | 
268 |         if (isComplete && stabilityCounter >= 2 && currentLength > 100) {
269 |           console.log('Completion indicators found, exiting');
270 |           break;
271 |         }
272 |       }
273 |       return lastAnswer || 'No answer content found. The website may be experiencing issues.';
274 |     });
275 | 
276 |     try {
277 |       // Race between the answer generation and the timeout
278 |       return await Promise.race([answerPromise, timeoutPromise]);
279 |     } catch (error) {
280 |       logError("Error waiting for complete answer:", {
281 |         error: error instanceof Error ? error.message : String(error),
282 |       });
283 |       // Return partial answer if available
284 |       try {
285 |         // Make multiple attempts to get partial content
286 |         for (let attempt = 0; attempt < 3; attempt++) {
287 |           try {
288 |             const partialAnswer = await page.evaluate(() => {
289 |               const elements = Array.from(document.querySelectorAll('.prose'));
290 |               return elements.map((el) => (el as HTMLElement).innerText.trim()).join('\n\n');
291 |             });
292 | 
293 |             if (partialAnswer && partialAnswer.length > 50) {
294 |               return partialAnswer + '\n\n[Note: Answer retrieval was interrupted. This is a partial response.]';
295 |             }
296 | 
297 |             // Wait briefly before trying again
298 |             await new Promise(resolve => setTimeout(resolve, 1000));
299 |           } catch (evalError) {
300 |             logError(`Attempt ${attempt + 1} to get partial answer failed:`, {
301 |               error: evalError instanceof Error ? evalError.message : String(evalError),
302 |             });
303 |             // Wait before retrying
304 |             await new Promise(resolve => setTimeout(resolve, 1000));
305 |           }
306 |         }
307 | 
308 |         return 'Answer retrieval timed out. The service might be experiencing high load. Please try again with a more specific query.';
309 |       } catch (e) {
310 |         logError("Failed to retrieve partial answer:", {
311 |           error: e instanceof Error ? e.message : String(e),
312 |         });
313 |         return 'Answer retrieval timed out. Please try again later.';
314 |       }
315 |     }
316 |   }
317 | 
318 |   // Helper method to extract answer when normal selectors fail
319 |   private async extractFallbackAnswer(page: Page): Promise<string> {
320 |     try {
321 |       return await page.evaluate(() => {
322 |         // Try various ways to find content
323 |         const contentSelectors = [
324 |           // Common content containers
325 |           'main', 'article', '.content', '.answer', '.result',
326 |           // Text containers
327 |           'p', 'div > p', '.text', '[class*="text"]',
328 |           // Any large text block
329 |           'div:not(:empty)'
330 |         ];
331 | 
332 |         for (const selector of contentSelectors) {
333 |           const elements = Array.from(document.querySelectorAll(selector));
334 |           // Filter to elements with substantial text
335 |           const textElements = elements.filter(el => {
336 |             const text = (el as HTMLElement).innerText.trim();
337 |             return text.length > 100; // Only consider elements with substantial text
338 |           });
339 | 
340 |           if (textElements.length > 0) {
341 |             // Sort by text length to find the most substantial content
342 |             textElements.sort((a, b) => {
343 |               return (b as HTMLElement).innerText.length - (a as HTMLElement).innerText.length;
344 |             });
345 | 
346 |             // Get the top 3 elements with the most text
347 |             const topElements = textElements.slice(0, 3);
348 |             return topElements.map(el => (el as HTMLElement).innerText.trim()).join('\n\n');
349 |           }
350 |         }
351 | 
352 |         // Last resort: get any visible text
353 |         return document.body.innerText.substring(0, 2000) + '\n\n[Note: Content extraction used fallback method due to page structure changes]';
354 |       });
355 |     } catch (error) {
356 |       logError("Error in fallback answer extraction:", {
357 |         error: error instanceof Error ? error.message : String(error),
358 |       });
359 |       return 'Unable to extract answer content. The website structure may have changed.';
360 |     }
361 |   }
362 | 
363 |   private generateErrorResponse(error: unknown): string {
364 |     const errorMessage = error instanceof Error ? error.message : String(error);
365 | 
366 |     if (errorMessage.includes("timeout") || errorMessage.includes("Timed out")) {
367 |       return "The search operation is taking longer than expected. This might be due to high server load. Please try again with a more specific query.";
368 |     }
369 | 
370 |     if (errorMessage.includes("navigation") || errorMessage.includes("Navigation")) {
371 |       return "The search operation encountered a navigation issue. This might be due to network connectivity problems. Please try again later.";
372 |     }
373 | 
374 |     if (errorMessage.includes("detached") || errorMessage.includes("Detached")) {
375 |       return "The search operation encountered a technical issue. Please try again with a more specific query.";
376 |     }
377 | 
378 |     return `The search operation could not be completed. Error: ${errorMessage}. Please try again later with a more specific query.`;
379 |   }
380 | }
381 | 


--------------------------------------------------------------------------------
/src/__tests__/unit/tools.test.ts:
--------------------------------------------------------------------------------
  1 | import { beforeEach, describe, expect, it, vi } from "vitest";
  2 | import type { PuppeteerContext } from "../../types/browser.js";
  3 | import type { PageContentResult } from "../../types/browser.js";
  4 | import type { ChatMessage } from "../../types/database.js";
  5 | 
  6 | // Mock Puppeteer
  7 | vi.mock("puppeteer", () => ({
  8 |   default: {
  9 |     launch: vi.fn(),
 10 |   },
 11 | }));
 12 | 
 13 | // Mock Mozilla Readability
 14 | vi.mock("@mozilla/readability", () => ({
 15 |   Readability: vi.fn(),
 16 | }));
 17 | 
 18 | // Mock JSDOM
 19 | vi.mock("jsdom", () => ({
 20 |   JSDOM: vi.fn(),
 21 | }));
 22 | 
 23 | // Mock logging
 24 | vi.mock("../../utils/logging.js", () => ({
 25 |   logInfo: vi.fn(),
 26 |   logWarn: vi.fn(),
 27 |   logError: vi.fn(),
 28 | }));
 29 | 
 30 | // Mock database utilities
 31 | const mockGetChatHistory = vi.fn();
 32 | const mockSaveChatMessage = vi.fn();
 33 | vi.mock("../../utils/db.js", () => ({
 34 |   initializeDatabase: vi.fn(),
 35 |   getChatHistory: () => mockGetChatHistory(),
 36 |   saveChatMessage: () => mockSaveChatMessage(),
 37 | }));
 38 | 
 39 | // Mock extraction utilities
 40 | vi.mock("../../utils/extraction.js", () => ({
 41 |   fetchSinglePageContent: vi.fn(),
 42 |   recursiveFetch: vi.fn(),
 43 |   extractSameDomainLinks: vi.fn(),
 44 | }));
 45 | 
 46 | // Mock fetch utilities
 47 | vi.mock("../../utils/fetch.js", () => ({
 48 |   fetchWithTimeout: vi.fn(),
 49 |   fetchSimpleContent: vi.fn(),
 50 | }));
 51 | 
 52 | // Mock puppeteer-logic utilities
 53 | vi.mock("../../utils/puppeteer-logic.js", () => ({
 54 |   isValidUrlForBrowser: vi.fn(),
 55 | }));
 56 | 
 57 | // Create a proper mock context with all required properties
 58 | const mockCtx: PuppeteerContext = {
 59 |   browser: null,
 60 |   page: null,
 61 |   isInitializing: false,
 62 |   searchInputSelector: 'textarea[placeholder*="Ask"]',
 63 |   lastSearchTime: 0,
 64 |   idleTimeout: null,
 65 |   operationCount: 0,
 66 |   log: vi.fn(),
 67 |   setBrowser: vi.fn(),
 68 |   setPage: vi.fn(),
 69 |   setIsInitializing: vi.fn(),
 70 |   setSearchInputSelector: vi.fn(),
 71 |   setIdleTimeout: vi.fn(),
 72 |   incrementOperationCount: vi.fn(),
 73 |   determineRecoveryLevel: vi.fn(),
 74 |   IDLE_TIMEOUT_MS: 300000,
 75 | };
 76 | 
 77 | describe("Tools", () => {
 78 |   beforeEach(() => {
 79 |     vi.clearAllMocks();
 80 |   });
 81 | 
 82 |   describe("chatPerplexity", () => {
 83 |     it("should handle basic chat functionality with new chat_id", async () => {
 84 |       const { default: chatPerplexity } = await import("../../tools/chatPerplexity.js");
 85 | 
 86 |       mockGetChatHistory.mockReturnValue([]);
 87 |       const mockPerformSearch = vi.fn().mockResolvedValue("Mock response");
 88 | 
 89 |       const args = { message: "Hello, world!" };
 90 |       const result = await chatPerplexity(
 91 |         args,
 92 |         mockCtx,
 93 |         mockPerformSearch,
 94 |         mockGetChatHistory,
 95 |         mockSaveChatMessage,
 96 |       );
 97 | 
 98 |       expect(mockGetChatHistory).toHaveBeenCalled();
 99 |       expect(mockSaveChatMessage).toHaveBeenCalled();
100 |       expect(mockPerformSearch).toHaveBeenCalledWith(
101 |         expect.stringContaining("Hello, world!"),
102 |         mockCtx,
103 |       );
104 |       expect(result).toBe("Mock response");
105 |     });
106 | 
107 |     it("should handle chat with existing chat_id and history", async () => {
108 |       const { default: chatPerplexity } = await import("../../tools/chatPerplexity.js");
109 | 
110 |       mockGetChatHistory.mockReturnValue([
111 |         { role: "user", content: "Previous message" } as ChatMessage,
112 |         { role: "assistant", content: "Previous response" } as ChatMessage,
113 |       ]);
114 |       const mockPerformSearch = vi.fn().mockResolvedValue("New response");
115 | 
116 |       const args = { message: "New message", chat_id: "test-chat-id" };
117 |       const result = await chatPerplexity(
118 |         args,
119 |         mockCtx,
120 |         mockPerformSearch,
121 |         mockGetChatHistory,
122 |         mockSaveChatMessage,
123 |       );
124 | 
125 |       expect(mockGetChatHistory).toHaveBeenCalledWith("test-chat-id");
126 |       expect(mockPerformSearch).toHaveBeenCalledWith(
127 |         expect.stringContaining("Previous message"),
128 |         mockCtx,
129 |       );
130 |       expect(result).toBe("New response");
131 |     });
132 | 
133 |     it("should handle empty message gracefully", async () => {
134 |       const { default: chatPerplexity } = await import("../../tools/chatPerplexity.js");
135 | 
136 |       mockGetChatHistory.mockReturnValue([]);
137 |       const mockPerformSearch = vi.fn().mockResolvedValue("Response to empty message");
138 | 
139 |       const args = { message: "" };
140 |       const result = await chatPerplexity(
141 |         args,
142 |         mockCtx,
143 |         mockPerformSearch,
144 |         mockGetChatHistory,
145 |         mockSaveChatMessage,
146 |       );
147 | 
148 |       expect(mockPerformSearch).toHaveBeenCalled();
149 |       expect(result).toBe("Response to empty message");
150 |     });
151 |   });
152 | 
153 |   describe("search", () => {
154 |     it("should handle normal detail level search", async () => {
155 |       const { default: search } = await import("../../tools/search.js");
156 | 
157 |       const mockPerformSearch = vi.fn().mockResolvedValue("Normal search result");
158 | 
159 |       const args = { query: "test query", detail_level: "normal" as const };
160 |       const result = await search(args, mockCtx, mockPerformSearch);
161 | 
162 |       expect(mockPerformSearch).toHaveBeenCalledWith(
163 |         expect.stringContaining("Provide a clear, balanced answer to: test query"),
164 |         mockCtx,
165 |       );
166 |       expect(result).toBe("Normal search result");
167 |     });
168 | 
169 |     it("should handle brief detail level search", async () => {
170 |       const { default: search } = await import("../../tools/search.js");
171 | 
172 |       const mockPerformSearch = vi.fn().mockResolvedValue("Brief search result");
173 | 
174 |       const args = { query: "test query", detail_level: "brief" as const };
175 |       const result = await search(args, mockCtx, mockPerformSearch);
176 | 
177 |       expect(mockPerformSearch).toHaveBeenCalledWith(
178 |         expect.stringContaining("Provide a brief, concise answer to: test query"),
179 |         mockCtx,
180 |       );
181 |       expect(result).toBe("Brief search result");
182 |     });
183 | 
184 |     it("should handle detailed detail level search", async () => {
185 |       const { default: search } = await import("../../tools/search.js");
186 | 
187 |       const mockPerformSearch = vi.fn().mockResolvedValue("Detailed search result");
188 | 
189 |       const args = { query: "test query", detail_level: "detailed" as const };
190 |       const result = await search(args, mockCtx, mockPerformSearch);
191 | 
192 |       expect(mockPerformSearch).toHaveBeenCalledWith(
193 |         expect.stringContaining("Provide a comprehensive, detailed analysis of: test query"),
194 |         mockCtx,
195 |       );
196 |       expect(result).toBe("Detailed search result");
197 |     });
198 | 
199 |     it("should handle streaming search", async () => {
200 |       const { default: search } = await import("../../tools/search.js");
201 | 
202 |       const mockPerformSearch = vi.fn().mockResolvedValue("Streaming search result");
203 | 
204 |       const args = { query: "test query", stream: true };
205 |       const result = await search(args, mockCtx, mockPerformSearch);
206 | 
207 |       // Should return a generator for streaming
208 |       expect(typeof result).toBe("object");
209 |       expect(result).toHaveProperty("next");
210 |     });
211 | 
212 |     it("should handle search with default parameters", async () => {
213 |       const { default: search } = await import("../../tools/search.js");
214 | 
215 |       const mockPerformSearch = vi.fn().mockResolvedValue("Default search result");
216 | 
217 |       const args = { query: "test query" };
218 |       const result = await search(args, mockCtx, mockPerformSearch);
219 | 
220 |       expect(mockPerformSearch).toHaveBeenCalledWith(
221 |         expect.stringContaining("Provide a clear, balanced answer to: test query"),
222 |         mockCtx,
223 |       );
224 |       expect(result).toBe("Default search result");
225 |     });
226 |   });
227 | 
228 |   describe("extractUrlContent", () => {
229 |     it("should handle single page extraction", async () => {
230 |       const { default: extractUrlContent } = await import("../../tools/extractUrlContent.js");
231 | 
232 |       const mockResult: PageContentResult = {
233 |         url: "https://example.com",
234 |         title: "Example Page",
235 |         textContent: "Example content",
236 |         error: null,
237 |       };
238 | 
239 |       const { fetchSinglePageContent } = await import("../../utils/extraction.js");
240 |       vi.mocked(fetchSinglePageContent).mockResolvedValue(mockResult);
241 | 
242 |       const args = { url: "https://example.com", depth: 1 };
243 |       const result = await extractUrlContent(args, mockCtx);
244 | 
245 |       // For depth=1, it should return the result directly as JSON
246 |       const parsedResult = JSON.parse(result);
247 |       expect(parsedResult.url).toBe("https://example.com");
248 |       expect(parsedResult.textContent).toBe("Example content");
249 |     });
250 | 
251 |     it("should handle recursive extraction with depth > 1", async () => {
252 |       const { default: extractUrlContent } = await import("../../tools/extractUrlContent.js");
253 | 
254 |       const mockResults: PageContentResult[] = [
255 |         {
256 |           url: "https://example.com",
257 |           title: "Example Page",
258 |           textContent: "Example content",
259 |           error: null,
260 |         },
261 |       ];
262 | 
263 |       const { recursiveFetch } = await import("../../utils/extraction.js");
264 |       vi.mocked(recursiveFetch).mockImplementation(async (_, __, ___, ____, results) => {
265 |         results.push(...mockResults);
266 |       });
267 | 
268 |       const args = { url: "https://example.com", depth: 2 };
269 |       const result = await extractUrlContent(args, mockCtx);
270 | 
271 |       const parsedResult = JSON.parse(result);
272 |       expect(parsedResult.explorationDepth).toBe(2);
273 |       expect(parsedResult.pagesExplored).toBe(1);
274 |       expect(parsedResult.rootUrl).toBe("https://example.com");
275 |     });
276 | 
277 |     it("should handle GitHub URL rewriting", async () => {
278 |       const { default: extractUrlContent } = await import("../../tools/extractUrlContent.js");
279 | 
280 |       const mockResult: PageContentResult = {
281 |         url: "https://github.com/user/repo",
282 |         title: "GitHub Repository",
283 |         textContent: "Repository content",
284 |         error: null,
285 |       };
286 | 
287 |       const { fetchSinglePageContent } = await import("../../utils/extraction.js");
288 |       vi.mocked(fetchSinglePageContent).mockResolvedValue(mockResult);
289 | 
290 |       const args = { url: "https://github.com/user/repo", depth: 1 };
291 |       const result = await extractUrlContent(args, mockCtx);
292 | 
293 |       // For GitHub URLs with depth=1, it should still return the result directly
294 |       const parsedResult = JSON.parse(result);
295 |       expect(parsedResult.url).toBe("https://github.com/user/repo");
296 |       expect(parsedResult.textContent).toBe("Repository content");
297 |     });
298 | 
299 |     it("should handle extraction errors gracefully", async () => {
300 |       const { default: extractUrlContent } = await import("../../tools/extractUrlContent.js");
301 | 
302 |       const { fetchSinglePageContent } = await import("../../utils/extraction.js");
303 |       // Mock fetchSinglePageContent to return an error result, not throw
304 |       vi.mocked(fetchSinglePageContent).mockResolvedValue({
305 |         url: "https://invalid-url.com",
306 |         error: "Network error",
307 |       });
308 | 
309 |       const args = { url: "https://invalid-url.com", depth: 1 };
310 | 
311 |       // The function should catch the error and return it in the result, not throw
312 |       const result = await extractUrlContent(args, mockCtx);
313 | 
314 |       // For depth=1, errors should be returned in the result object
315 |       const parsedResult = JSON.parse(result);
316 |       expect(parsedResult.error).toContain("Network error");
317 |     });
318 | 
319 |     it("should validate depth parameter boundaries", async () => {
320 |       const { default: extractUrlContent } = await import("../../tools/extractUrlContent.js");
321 | 
322 |       const mockResult: PageContentResult = {
323 |         url: "https://example.com",
324 |         title: "Example Page",
325 |         textContent: "Example content",
326 |         error: null,
327 |       };
328 | 
329 |       const { fetchSinglePageContent } = await import("../../utils/extraction.js");
330 |       vi.mocked(fetchSinglePageContent).mockResolvedValue(mockResult);
331 | 
332 |       // Test depth clamping - should be max 5
333 |       const args = { url: "https://example.com", depth: 10 };
334 |       const result = await extractUrlContent(args, mockCtx);
335 | 
336 |       // For depth > 1, it should return the formatted result object
337 |       const parsedResult = JSON.parse(result);
338 |       expect(parsedResult.explorationDepth).toBe(5); // Max depth should be 5
339 |     });
340 |   });
341 | 
342 |   describe("getDocumentation", () => {
343 |     it("should handle basic documentation query", async () => {
344 |       const { default: getDocumentation } = await import("../../tools/getDocumentation.js");
345 | 
346 |       const mockPerformSearch = vi.fn().mockResolvedValue("Documentation result");
347 | 
348 |       const args = { query: "React hooks" };
349 |       const result = await getDocumentation(args, mockCtx, mockPerformSearch);
350 | 
351 |       expect(mockPerformSearch).toHaveBeenCalledWith(
352 |         expect.stringContaining(
353 |           "Provide comprehensive documentation and usage examples for React hooks",
354 |         ),
355 |         mockCtx,
356 |       );
357 |       expect(result).toBe("Documentation result");
358 |     });
359 | 
360 |     it("should handle documentation query with context", async () => {
361 |       const { default: getDocumentation } = await import("../../tools/getDocumentation.js");
362 | 
363 |       const mockPerformSearch = vi.fn().mockResolvedValue("Documentation with context result");
364 | 
365 |       const args = { query: "React hooks", context: "focus on performance optimization" };
366 |       const result = await getDocumentation(args, mockCtx, mockPerformSearch);
367 | 
368 |       expect(mockPerformSearch).toHaveBeenCalledWith(
369 |         expect.stringContaining("Focus on: focus on performance optimization"),
370 |         mockCtx,
371 |       );
372 |       expect(result).toBe("Documentation with context result");
373 |     });
374 |   });
375 | 
376 |   describe("findApis", () => {
377 |     it("should handle API discovery query", async () => {
378 |       const { default: findApis } = await import("../../tools/findApis.js");
379 | 
380 |       const mockPerformSearch = vi.fn().mockResolvedValue("API discovery result");
381 | 
382 |       const args = { requirement: "image recognition" };
383 |       const result = await findApis(args, mockCtx, mockPerformSearch);
384 | 
385 |       expect(mockPerformSearch).toHaveBeenCalledWith(
386 |         expect.stringContaining("Find and evaluate APIs that could be used for: image recognition"),
387 |         mockCtx,
388 |       );
389 |       expect(result).toBe("API discovery result");
390 |     });
391 | 
392 |     it("should handle API discovery with context", async () => {
393 |       const { default: findApis } = await import("../../tools/findApis.js");
394 | 
395 |       const mockPerformSearch = vi.fn().mockResolvedValue("API discovery with context result");
396 | 
397 |       const args = { requirement: "payment processing", context: "prefer free tier options" };
398 |       const result = await findApis(args, mockCtx, mockPerformSearch);
399 | 
400 |       expect(mockPerformSearch).toHaveBeenCalledWith(
401 |         expect.stringContaining("Context: prefer free tier options"),
402 |         mockCtx,
403 |       );
404 |       expect(result).toBe("API discovery with context result");
405 |     });
406 |   });
407 | 
408 |   describe("checkDeprecatedCode", () => {
409 |     it("should handle deprecated code checking", async () => {
410 |       const { default: checkDeprecatedCode } = await import("../../tools/checkDeprecatedCode.js");
411 | 
412 |       const mockPerformSearch = vi.fn().mockResolvedValue("Deprecation check result");
413 | 
414 |       const args = { code: "componentWillMount()" };
415 |       const result = await checkDeprecatedCode(args, mockCtx, mockPerformSearch);
416 | 
417 |       expect(mockPerformSearch).toHaveBeenCalledWith(
418 |         expect.stringContaining("componentWillMount()"),
419 |         mockCtx,
420 |       );
421 |       expect(result).toBe("Deprecation check result");
422 |     });
423 | 
424 |     it("should handle deprecated code checking with technology context", async () => {
425 |       const { default: checkDeprecatedCode } = await import("../../tools/checkDeprecatedCode.js");
426 | 
427 |       const mockPerformSearch = vi
428 |         .fn()
429 |         .mockResolvedValue("Deprecation check with tech context result");
430 | 
431 |       const args = { code: "var instead of let/const", technology: "React 16" };
432 |       const result = await checkDeprecatedCode(args, mockCtx, mockPerformSearch);
433 | 
434 |       expect(mockPerformSearch).toHaveBeenCalledWith(
435 |         expect.stringContaining("var instead of let/const"),
436 |         mockCtx,
437 |       );
438 |       expect(mockPerformSearch).toHaveBeenCalledWith(expect.stringContaining("React 16"), mockCtx);
439 |       expect(result).toBe("Deprecation check with tech context result");
440 |     });
441 |   });
442 | });
443 | 


--------------------------------------------------------------------------------