├── .nvmrc
├── docs
    ├── apify-logo.png
    ├── mcp-clients.png
    ├── apify-mcp-server.png
    ├── claude-desktop.png
    ├── apify_mcp_server_dark_background.png
    ├── apify_mcp_server_white_background.png
    └── apify-logo.svg
├── src
    ├── errors.ts
    ├── tsconfig.json
    ├── index.ts
    ├── prompts
    │   ├── index.ts
    │   └── latest-news-on-topic.ts
    ├── actor
    │   ├── README.md
    │   ├── types.ts
    │   ├── const.ts
    │   └── utils.ts
    ├── utils
    │   ├── version.ts
    │   ├── html-to-md.ts
    │   ├── mcp-clients.ts
    │   ├── userid-cache.ts
    │   ├── tool-status.ts
    │   ├── ajv.ts
    │   ├── mcp.ts
    │   ├── actor-details.ts
    │   ├── html.ts
    │   ├── logging.ts
    │   ├── tools.ts
    │   ├── ttl-lru.ts
    │   ├── actor-response.ts
    │   ├── schema-generation.ts
    │   ├── progress.ts
    │   ├── generic.ts
    │   ├── actor.ts
    │   └── apify-docs.ts
    ├── mcp
    │   ├── const.ts
    │   ├── proxy.ts
    │   ├── actors.ts
    │   ├── utils.ts
    │   └── client.ts
    ├── state.ts
    ├── index-internals.ts
    ├── main.ts
    ├── tools
    │   ├── run_collection.ts
    │   ├── key_value_store_collection.ts
    │   ├── dataset_collection.ts
    │   ├── helpers.ts
    │   ├── index.ts
    │   ├── fetch-actor-details.ts
    │   ├── build.ts
    │   ├── run.ts
    │   ├── fetch-apify-docs.ts
    │   ├── key_value_store.ts
    │   ├── search-apify-docs.ts
    │   └── get-html-skeleton.ts
    ├── apify-client.ts
    ├── telemetry.ts
    └── input.ts
├── glama.json
├── .npmignore
├── .env.example
├── tsconfig.eslint.json
├── .dockerignore
├── tsconfig.json
├── tests
    ├── integration
    │   ├── stdio.test.ts
    │   ├── utils
    │   │   └── port.ts
    │   ├── actor.server-sse.test.ts
    │   ├── actor.server-streamable.test.ts
    │   └── internals.test.ts
    ├── const.ts
    ├── README.md
    └── unit
    │   ├── tools.actor.test.ts
    │   ├── utils.tool-status.test.ts
    │   ├── utils.ttl-lru.test.ts
    │   ├── mcp.utils.test.ts
    │   ├── utils.progress.test.ts
    │   ├── mcp.actors.test.ts
    │   ├── schema-generation.test.ts
    │   ├── telemetry.test.ts
    │   ├── utils.html.test.ts
    │   └── utils.actor.test.ts
├── vitest.config.ts
├── evals
    ├── tsconfig.json
    ├── eval-single.ts
    └── create-dataset.ts
├── .actor
    ├── actor.json
    ├── Dockerfile
    └── input_schema.json
├── smithery.yaml
├── .gitignore
├── Dockerfile
├── server.json
├── LICENSE.md
├── .github
    └── workflows
    │   ├── check.yaml
    │   ├── evaluations.yaml
    │   └── pre_release.yaml
├── .editorconfig
├── res
    ├── INDEX.md
    └── ALGOLIA.md
├── pyproject.toml
├── manifest.json
├── package.json
└── eslint.config.mjs


/.nvmrc:
--------------------------------------------------------------------------------
1 | 24
2 | 


--------------------------------------------------------------------------------
/docs/apify-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apify/apify-mcp-server/HEAD/docs/apify-logo.png


--------------------------------------------------------------------------------
/docs/mcp-clients.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apify/apify-mcp-server/HEAD/docs/mcp-clients.png


--------------------------------------------------------------------------------
/docs/apify-mcp-server.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apify/apify-mcp-server/HEAD/docs/apify-mcp-server.png


--------------------------------------------------------------------------------
/docs/claude-desktop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apify/apify-mcp-server/HEAD/docs/claude-desktop.png


--------------------------------------------------------------------------------
/src/errors.ts:
--------------------------------------------------------------------------------
1 | export class TimeoutError extends Error {
2 |     override readonly name = 'TimeoutError';
3 | }
4 | 


--------------------------------------------------------------------------------
/glama.json:
--------------------------------------------------------------------------------
1 | {
2 |     "$schema": "https://glama.ai/mcp/schemas/server.json",
3 |     "maintainers": [ "jirispilka", "mq37" ]
4 | }
5 | 


--------------------------------------------------------------------------------
/docs/apify_mcp_server_dark_background.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apify/apify-mcp-server/HEAD/docs/apify_mcp_server_dark_background.png


--------------------------------------------------------------------------------
/docs/apify_mcp_server_white_background.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apify/apify-mcp-server/HEAD/docs/apify_mcp_server_white_background.png


--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
1 | # .npmignore
2 | # Exclude everything by default
3 | *
4 | 
5 | # Include specific files and folders
6 | !dist/
7 | !README.md
8 | !LICENSE
9 | 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | APIFY_TOKEN=
2 | 
3 | # EVALS
4 | PHOENIX_API_KEY=
5 | PHOENIX_HOST=
6 | 
7 | OPENROUTER_API_KEY=
8 | OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
9 | 


--------------------------------------------------------------------------------
/tsconfig.eslint.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "extends": "./tsconfig.json",
 3 |     "include": [
 4 |         "evals",
 5 |         "src",
 6 |         "test",
 7 |         "tests",
 8 |         "vitest.config.ts"
 9 |     ],
10 | }
11 | 


--------------------------------------------------------------------------------
/src/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "extends": "../tsconfig.json",
 3 |     "compilerOptions": {
 4 |         "rootDir": "./",
 5 |         "outDir": "../dist",
 6 |         "noEmit": false,
 7 |     },
 8 |     "include": [
 9 |         "./**/*"
10 |     ]
11 | }
12 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | # configurations
 2 | .idea
 3 | 
 4 | # crawlee and apify storage folders
 5 | apify_storage
 6 | crawlee_storage
 7 | storage
 8 | 
 9 | # installed files
10 | node_modules
11 | 
12 | # git folder
13 | .git
14 | 
15 | # data
16 | data
17 | src/storage
18 | dist
19 | .env
20 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
1 | /*
2 |  This file provides essential functions and tools for MCP servers, serving as a library.
3 |  The ActorsMcpServer should be the only class exported from the package
4 | */
5 | 
6 | import { ActorsMcpServer } from './mcp/server.js';
7 | 
8 | export { ActorsMcpServer };
9 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "extends": "@apify/tsconfig",
 3 |     "compilerOptions": {
 4 |         "module": "ES2022",
 5 |         "skipLibCheck": true,
 6 |         "noEmit": true,
 7 |     },
 8 |     "include": [
 9 |         "src/**/*",
10 |         "tests/**/*"
11 |     ]
12 | }
13 | 


--------------------------------------------------------------------------------
/src/prompts/index.ts:
--------------------------------------------------------------------------------
 1 | import type { PromptBase } from '../types.js';
 2 | import { latestNewsOnTopicPrompt } from './latest-news-on-topic.js';
 3 | 
 4 | /**
 5 |  * List of all enabled prompts.
 6 |  */
 7 | export const prompts: PromptBase[] = [
 8 |     latestNewsOnTopicPrompt,
 9 | ];
10 | 


--------------------------------------------------------------------------------
/src/actor/README.md:
--------------------------------------------------------------------------------
1 | # Actor
2 | 
3 | Code related to Apify Actor called Actors-MCP-Server.
4 | This Actor will be deprecated in favor of Apify MCP Server, therefore we are keeping it separate from the main codebase.
5 | 
6 | The only exception is the `src/main.ts` file that also belongs to the Actor.
7 | 


--------------------------------------------------------------------------------
/tests/integration/stdio.test.ts:
--------------------------------------------------------------------------------
1 | import { createMcpStdioClient } from '../helpers.js';
2 | import { createIntegrationTestsSuite } from './suite.js';
3 | 
4 | createIntegrationTestsSuite({
5 |     suiteName: 'MCP stdio',
6 |     transport: 'stdio',
7 |     createClientFn: createMcpStdioClient,
8 | });
9 | 


--------------------------------------------------------------------------------
/vitest.config.ts:
--------------------------------------------------------------------------------
 1 | // eslint-disable-next-line import/extensions
 2 | import { defineConfig } from 'vitest/config';
 3 | 
 4 | export default defineConfig({
 5 |     test: {
 6 |         globals: true,
 7 |         environment: 'node',
 8 |         include: ['tests/**/*.test.ts'],
 9 |         testTimeout: 120_000,
10 |     },
11 | });
12 | 


--------------------------------------------------------------------------------
/src/utils/version.ts:
--------------------------------------------------------------------------------
 1 | import { createRequire } from 'node:module';
 2 | 
 3 | const require = createRequire(import.meta.url);
 4 | const packageJson = require('../../package.json');
 5 | 
 6 | /**
 7 |  * Gets the package version from package.json
 8 |  * Returns null if version is not available
 9 |  */
10 | export function getPackageVersion(): string | null {
11 |     return packageJson.version || null;
12 | }
13 | 


--------------------------------------------------------------------------------
/src/mcp/const.ts:
--------------------------------------------------------------------------------
 1 | export const MAX_TOOL_NAME_LENGTH = 64;
 2 | export const SERVER_ID_LENGTH = 8;
 3 | export const EXTERNAL_TOOL_CALL_TIMEOUT_MSEC = 120_000; // 2 minutes
 4 | export const ACTORIZED_MCP_CONNECTION_TIMEOUT_MSEC = 30_000; // 30 seconds
 5 | 
 6 | export const LOG_LEVEL_MAP: Record<string, number> = {
 7 |     debug: 0,
 8 |     info: 1,
 9 |     notice: 2,
10 |     warning: 3,
11 |     error: 4,
12 |     critical: 5,
13 |     alert: 6,
14 |     emergency: 7,
15 | };
16 | 


--------------------------------------------------------------------------------
/evals/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "compilerOptions": {
 3 |         "target": "ES2019",
 4 |         "module": "ES2022",
 5 |         "moduleResolution": "bundler",
 6 |         "lib": ["ESNext"],
 7 |         "strict": true,
 8 |         "esModuleInterop": true,
 9 |         "allowSyntheticDefaultImports": true,
10 |         "skipLibCheck": true,
11 |         "noEmit": true,
12 |         "types": ["vitest/globals"]
13 |     },
14 |     "include": [
15 |         "*.ts"
16 |     ]
17 | }
18 | 


--------------------------------------------------------------------------------
/.actor/actor.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "actorSpecification": 1,
 3 |   "name": "apify-mcp-server",
 4 |   "title": "Model Context Protocol Server for Apify Actors",
 5 |   "description": "Implementation of a Model Context Protocol (MCP) Server for Apify Actors that enables AI applications (and AI agents) to interact with Apify Actors",
 6 |   "version": "0.1",
 7 |   "input": "./input_schema.json",
 8 |   "readme": "./ACTOR.md",
 9 |   "dockerfile": "./Dockerfile",
10 |   "webServerMcpPath": "/sse"
11 | }
12 | 


--------------------------------------------------------------------------------
/src/actor/types.ts:
--------------------------------------------------------------------------------
 1 | export type ActorRunData = {
 2 |     id?: string;
 3 |     actId?: string;
 4 |     userId?: string;
 5 |     startedAt?: string;
 6 |     finishedAt: null;
 7 |     status: 'RUNNING';
 8 |     meta: {
 9 |         origin?: string;
10 |     };
11 |     options: {
12 |         build?: string;
13 |         memoryMbytes?: string;
14 |     };
15 |     buildId?: string;
16 |     defaultKeyValueStoreId?: string;
17 |     defaultDatasetId?: string;
18 |     defaultRequestQueueId?: string;
19 |     buildNumber?: string;
20 |     containerUrl?: string;
21 |     standbyUrl?: string;
22 | };
23 | 


--------------------------------------------------------------------------------
/tests/const.ts:
--------------------------------------------------------------------------------
 1 | import { defaults } from '../src/const.js';
 2 | import { toolCategoriesEnabledByDefault } from '../src/tools/index.js';
 3 | import { actorNameToToolName } from '../src/tools/utils.js';
 4 | import { getExpectedToolNamesByCategories } from '../src/utils/tools.js';
 5 | 
 6 | export const ACTOR_PYTHON_EXAMPLE = 'apify/python-example';
 7 | export const ACTOR_MCP_SERVER_ACTOR_NAME = 'apify/actors-mcp-server';
 8 | export const DEFAULT_TOOL_NAMES = getExpectedToolNamesByCategories(toolCategoriesEnabledByDefault);
 9 | export const DEFAULT_ACTOR_NAMES = defaults.actors.map((tool) => actorNameToToolName(tool));
10 | 


--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------
 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml
 2 | 
 3 | startCommand:
 4 |   type: stdio
 5 |   configSchema:
 6 |     # JSON Schema defining the configuration options for the MCP.
 7 |     type: object
 8 |     required:
 9 |       - apifyToken
10 |     properties:
11 |       apifyToken:
12 |         type: string
13 |         description: The API token for accessing Apify's services.
14 |   commandFunction:
15 |     # A function that produces the CLI command to start the MCP on stdio.
16 |     |-
17 |     (config) => ({ command: 'node', args: ['dist/main.js'], env: { APIFY_TOKEN: config.apifyToken } })


--------------------------------------------------------------------------------
/docs/apify-logo.svg:
--------------------------------------------------------------------------------
1 | <svg width="32" height="32" viewBox="0 0 32 32" fill="none" xmlns="http://www.w3.org/2000/svg">
2 | <path d="M18.3512 0H31.5152C31.7829 0 32 0.217074 32 0.484848V20.6025C32 21.0844 31.3733 21.2712 31.1094 20.868L17.9455 0.750323C17.7345 0.427859 17.9659 0 18.3512 0Z" fill="#246DFF"/>
3 | <path d="M13.6488 0H0.484848C0.217074 0 0 0.217074 0 0.484848V20.6025C0 21.0844 0.626717 21.2712 0.890559 20.868L14.0545 0.750323C14.2655 0.427859 14.0341 0 13.6488 0Z" fill="#20A34E"/>
4 | <path d="M15.7745 16.1069L0.820235 31.1736C0.51656 31.4796 0.733277 32 1.16436 32H30.848C31.2773 32 31.4948 31.4832 31.1947 31.1762L16.4653 16.1095C16.2761 15.916 15.9651 15.9148 15.7745 16.1069Z" fill="#F86606"/>
5 | </svg>


--------------------------------------------------------------------------------
/tests/integration/utils/port.ts:
--------------------------------------------------------------------------------
 1 | import { createServer } from 'node:net';
 2 | 
 3 | /**
 4 |  * Finds an available port by letting the OS assign one dynamically.
 5 |  * This is to prevent the address already in use errors to prevent flaky tests.
 6 |  * @returns Promise<number> - An available port assigned by the OS
 7 |  */
 8 | export async function getAvailablePort(): Promise<number> {
 9 |     return new Promise((resolve, reject) => {
10 |         const server = createServer();
11 |         server.listen(0, () => {
12 |             const { port } = server.address() as { port: number };
13 |             server.close(() => resolve(port));
14 |         });
15 |         server.on('error', reject);
16 |     });
17 | }
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # This file tells Git which files shouldn't be added to source control
 2 | 
 3 | .idea
 4 | .vscode
 5 | storage
 6 | apify_storage
 7 | crawlee_storage
 8 | node_modules
 9 | dist
10 | tsconfig.tsbuildinfo
11 | storage/*
12 | !storage/key_value_stores
13 | storage/key_value_stores/*
14 | !storage/key_value_stores/default
15 | storage/key_value_stores/default/*
16 | !storage/key_value_stores/default/INPUT.json
17 | 
18 | # Added by Apify CLI
19 | .venv
20 | .env
21 | 
22 | # Aider coding agent files
23 | .aider*
24 | 
25 | 
26 | # MCP registry private key
27 | key.pem
28 | 
29 | # Ignore MCP config for Opencode client
30 | opencode.json
31 | 
32 | # Python cache files
33 | __pycache__/
34 | *.pyc
35 | *.pyo
36 | *.pyd
37 | .Python
38 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Stage 1: Build the project
 2 | FROM node:24-alpine AS builder
 3 | 
 4 | # Set working directory
 5 | WORKDIR /app
 6 | 
 7 | # Copy package files and install dependencies
 8 | COPY package.json package-lock.json ./
 9 | RUN npm install
10 | 
11 | # Copy source files
12 | COPY src ./src
13 | COPY tsconfig.json ./
14 | 
15 | # Build the project
16 | RUN npm run build
17 | 
18 | # Stage 2: Set up the runtime environment
19 | FROM node:24-alpine
20 | 
21 | # Set working directory
22 | WORKDIR /app
23 | 
24 | # Copy only the necessary files from the build stage
25 | COPY --from=builder /app/dist ./dist
26 | COPY package.json package-lock.json ./
27 | 
28 | # Install production dependencies only
29 | RUN npm ci --omit=dev
30 | 
31 | # Set the entry point for the container
32 | ENTRYPOINT ["node", "dist/stdio.js"]
33 | 


--------------------------------------------------------------------------------
/src/actor/const.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Constants for the Actor.
 3 |  */
 4 | export const HEADER_READINESS_PROBE = 'x-apify-container-server-readiness-probe';
 5 | 
 6 | export enum TransportType {
 7 |     HTTP = 'HTTP',
 8 |     SSE = 'SSE',
 9 | }
10 | 
11 | export enum Routes {
12 |     ROOT = '/',
13 |     MCP = '/mcp',
14 |     SSE = '/sse',
15 |     MESSAGE = '/message',
16 | }
17 | 
18 | export const getHelpMessage = (host: string) => `To interact with the server you can either:
19 | - send request to ${host}${Routes.MCP}?token=YOUR-APIFY-TOKEN and receive a response
20 | or
21 | - connect for Server-Sent Events (SSE) via GET request to: ${host}${Routes.SSE}?token=YOUR-APIFY-TOKEN
22 | - send messages via POST request to: ${host}${Routes.MESSAGE}?token=YOUR-APIFY-TOKEN
23 |   (Include your message content in the request body.)`;
24 | 


--------------------------------------------------------------------------------
/server.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://static.modelcontextprotocol.io/schemas/2025-07-09/server.schema.json",
 3 |   "name": "com.apify/apify-mcp-server",
 4 |   "description": "Extract data from any website with thousands of scrapers, crawlers, and automations on Apify Store ⚡",
 5 |   "status": "active",
 6 |   "repository": {
 7 |     "url": "https://github.com/apify/apify-mcp-server",
 8 |     "source": "github"
 9 |   },
10 |   "version": "0.6.5",
11 |   "remotes": [
12 |     {
13 |       "type": "streamable-http",
14 |       "url": "https://mcp.apify.com/",
15 |       "headers": [
16 |         {
17 |           "name": "Authorization",
18 |           "description": "Apify API token for authentication with Apify platform services. For example 'Bearer <apify-api-token>'",
19 |           "is_required": true,
20 |           "is_secret": true
21 |         }
22 |       ]
23 |     }
24 |   ]
25 | }
26 | 


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
 1 | # Tests
 2 | 
 3 | This directory contains **unit** and **integration** tests for the `actors-mcp-server` project.
 4 | 
 5 | # Unit Tests
 6 | 
 7 | Unit tests are located in the `tests/unit` directory.
 8 | 
 9 | To run the unit tests, you can use the following command:
10 | ```bash
11 | npm run test:unit
12 | ```
13 | 
14 | # Integration Tests
15 | 
16 | Integration tests are located in the `tests/integration` directory.
17 | In order to run the integration tests, you need to have the `APIFY_TOKEN` environment variable set.
18 | Also following Actors need to exist on the target execution Apify platform:
19 | ```
20 | ALL DEFAULT ONES DEFINED IN consts.ts AND ALSO EXPLICITLY:
21 | apify/rag-web-browser
22 | apify/instagram-scraper
23 | apify/python-example
24 | ```
25 | 
26 | To run the integration tests, you can use the following command:
27 | ```bash
28 | APIFY_TOKEN=your_token npm run test:integration
29 | ```
30 | 


--------------------------------------------------------------------------------
/src/utils/html-to-md.ts:
--------------------------------------------------------------------------------
 1 | import TurndownService from 'turndown';
 2 | 
 3 | const turndown = new TurndownService();
 4 | 
 5 | // Remove non-visible elements
 6 | turndown.remove('script');
 7 | turndown.remove('style');
 8 | turndown.remove('noscript');
 9 | 
10 | // Remove multimedia elements
11 | turndown.remove('svg');
12 | turndown.remove('img');
13 | turndown.remove('figure');
14 | turndown.remove('video');
15 | turndown.remove('audio');
16 | turndown.remove('picture');
17 | 
18 | // Remove interactive elements
19 | turndown.remove('canvas');
20 | turndown.remove('button');
21 | turndown.remove('select');
22 | turndown.remove('input');
23 | 
24 | // Remove embedded
25 | turndown.remove('iframe');
26 | turndown.remove('embed');
27 | turndown.remove('object');
28 | 
29 | // Remove navigation and footer elements
30 | turndown.remove('aside');
31 | turndown.remove('nav');
32 | turndown.remove('footer');
33 | 
34 | /**
35 |  * Converts HTML content to Markdown format using Turndown.
36 |  */
37 | export function htmlToMarkdown(html: string): string {
38 |     return turndown.turndown(html);
39 | }
40 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Apify
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/actor/utils.ts:
--------------------------------------------------------------------------------
 1 | import { Actor } from 'apify';
 2 | 
 3 | import type { ActorRunData } from './types.js';
 4 | 
 5 | export function getActorRunData(): ActorRunData | null {
 6 |     return Actor.isAtHome() ? {
 7 |         id: process.env.ACTOR_RUN_ID,
 8 |         actId: process.env.ACTOR_ID,
 9 |         userId: process.env.APIFY_USER_ID,
10 |         startedAt: process.env.ACTOR_STARTED_AT,
11 |         finishedAt: null,
12 |         status: 'RUNNING',
13 |         meta: {
14 |             origin: process.env.APIFY_META_ORIGIN,
15 |         },
16 |         options: {
17 |             build: process.env.ACTOR_BUILD_NUMBER,
18 |             memoryMbytes: process.env.ACTOR_MEMORY_MBYTES,
19 |         },
20 |         buildId: process.env.ACTOR_BUILD_ID,
21 |         defaultKeyValueStoreId: process.env.ACTOR_DEFAULT_KEY_VALUE_STORE_ID,
22 |         defaultDatasetId: process.env.ACTOR_DEFAULT_DATASET_ID,
23 |         defaultRequestQueueId: process.env.ACTOR_DEFAULT_REQUEST_QUEUE_ID,
24 |         buildNumber: process.env.ACTOR_BUILD_NUMBER,
25 |         containerUrl: process.env.ACTOR_WEB_SERVER_URL,
26 |         standbyUrl: process.env.ACTOR_STANDBY_URL,
27 |     } : null;
28 | }
29 | 


--------------------------------------------------------------------------------
/src/utils/mcp-clients.ts:
--------------------------------------------------------------------------------
 1 | import type { InitializeRequest } from '@modelcontextprotocol/sdk/types.js';
 2 | import { mcpClients } from 'mcp-client-capabilities';
 3 | 
 4 | /**
 5 |  * Determines if the MCP client supports dynamic tools based on the InitializeRequest data.
 6 |  */
 7 | export function doesMcpClientSupportDynamicTools(initializeRequestData?: InitializeRequest): boolean {
 8 |     const clientName = initializeRequestData?.params?.clientInfo?.name;
 9 |     const clientCapabilities = mcpClients[clientName || ''];
10 |     if (!clientCapabilities) return false;
11 | 
12 |     const clientProtocolVersion = clientCapabilities.protocolVersion;
13 |     const knownProtocolVersion = initializeRequestData?.params?.protocolVersion;
14 | 
15 |     // Compare the protocolVersion to check if the client is up to date
16 |     // We check for strict equality because if the versions differ, we cannot be sure about the capabilities
17 |     if (clientProtocolVersion !== knownProtocolVersion) {
18 |         // Client version is different from the known version, we cannot be sure about its capabilities
19 |         return false;
20 |     }
21 | 
22 |     return clientCapabilities.tools?.listChanged === true;
23 | }
24 | 


--------------------------------------------------------------------------------
/src/utils/userid-cache.ts:
--------------------------------------------------------------------------------
 1 | import { createHash } from 'node:crypto';
 2 | 
 3 | import type { ApifyClient } from '../apify-client.js';
 4 | import { USER_CACHE_MAX_SIZE, USER_CACHE_TTL_SECS } from '../const.js';
 5 | import { TTLLRUCache } from './ttl-lru.js';
 6 | 
 7 | // LRU cache with TTL for user info - stores the raw User object from API
 8 | const userIdCache = new TTLLRUCache<string>(USER_CACHE_MAX_SIZE, USER_CACHE_TTL_SECS);
 9 | 
10 | /**
11 |  * Gets user ID from token, using cache to avoid repeated API calls
12 |  * Token is hashed before caching to avoid storing raw tokens
13 |  * Returns userId or null if not found
14 |  */
15 | export async function getUserIdFromTokenCached(
16 |     token: string,
17 |     apifyClient: ApifyClient,
18 | ): Promise<string | null> {
19 |     const tokenHash = createHash('sha256').update(token).digest('hex');
20 |     const cachedId = userIdCache.get(tokenHash);
21 |     if (cachedId) return cachedId;
22 | 
23 |     try {
24 |         const user = await apifyClient.user('me').get();
25 |         if (!user || !user.id) {
26 |             return null;
27 |         }
28 |         userIdCache.set(tokenHash, user.id);
29 |         return user.id;
30 |     } catch {
31 |         return null;
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/.github/workflows/check.yaml:
--------------------------------------------------------------------------------
 1 | # This workflow runs for every pull request to lint and test the proposed changes.
 2 | 
 3 | name: Check
 4 | 
 5 | on:
 6 |     pull_request:
 7 | 
 8 |     # Push to master will trigger code checks
 9 |     push:
10 |         branches:
11 |             - master
12 |         tags-ignore:
13 |             - "**" # Ignore all tags to prevent duplicate builds when tags are pushed.
14 | 
15 | jobs:
16 |     lint_and_test:
17 |         name: Code checks
18 |         runs-on: ubuntu-latest
19 | 
20 |         steps:
21 |             -   uses: actions/checkout@v4
22 |             -   name: Use Node.js
23 |                 uses: actions/setup-node@v6
24 |                 with:
25 |                     node-version-file: '.nvmrc'
26 |                     cache: 'npm'
27 |                     cache-dependency-path: 'package-lock.json'
28 |             -   name: Install Dependencies
29 |                 run: npm ci --force
30 | 
31 |             -   name: Lint
32 |                 run: npm run lint
33 | 
34 |             -   name: Build
35 |                 run: npm run build
36 | 
37 |             -   name: Test
38 |                 run: npm run test
39 | 
40 |             -   name: Type checks
41 |                 run: npm run type-check
42 | 


--------------------------------------------------------------------------------
/src/mcp/proxy.ts:
--------------------------------------------------------------------------------
 1 | import type { Client } from '@modelcontextprotocol/sdk/client/index.js';
 2 | 
 3 | import { fixedAjvCompile } from '../tools/utils.js';
 4 | import type { ActorMcpTool, ToolEntry } from '../types.js';
 5 | import { ajv } from '../utils/ajv.js';
 6 | import { getMCPServerID, getProxyMCPServerToolName } from './utils.js';
 7 | 
 8 | export async function getMCPServerTools(
 9 |     actorID: string,
10 |     client: Client,
11 |     // Name of the MCP server
12 |     serverUrl: string,
13 | ): Promise<ToolEntry[]> {
14 |     const res = await client.listTools();
15 |     const { tools } = res;
16 | 
17 |     const compiledTools: ToolEntry[] = [];
18 |     for (const tool of tools) {
19 |         const mcpTool: ActorMcpTool = {
20 |             type: 'actor-mcp',
21 |             actorId: actorID,
22 |             serverId: getMCPServerID(serverUrl),
23 |             serverUrl,
24 |             originToolName: tool.name,
25 | 
26 |             name: getProxyMCPServerToolName(serverUrl, tool.name),
27 |             description: tool.description || '',
28 |             inputSchema: tool.inputSchema,
29 |             ajvValidate: fixedAjvCompile(ajv, tool.inputSchema),
30 |         };
31 | 
32 |         compiledTools.push(mcpTool);
33 |     }
34 | 
35 |     return compiledTools;
36 | }
37 | 


--------------------------------------------------------------------------------
/tests/unit/tools.actor.test.ts:
--------------------------------------------------------------------------------
 1 | import { describe, expect, it } from 'vitest';
 2 | 
 3 | import { actorNameToToolName } from '../../src/tools/utils.js';
 4 | 
 5 | describe('actors', () => {
 6 |     describe('actorNameToToolName', () => {
 7 |         it('should replace slashes and dots with dash notation', () => {
 8 |             expect(actorNameToToolName('apify/web-scraper')).toBe('apify-slash-web-scraper');
 9 |             expect(actorNameToToolName('my.actor.name')).toBe('my-dot-actor-dot-name');
10 |         });
11 | 
12 |         it('should handle empty strings', () => {
13 |             expect(actorNameToToolName('')).toBe('');
14 |         });
15 | 
16 |         it('should handle strings without slashes or dots', () => {
17 |             expect(actorNameToToolName('actorname')).toBe('actorname');
18 |         });
19 | 
20 |         it('should handle strings with multiple slashes and dots', () => {
21 |             expect(actorNameToToolName('actor/name.with/multiple.parts')).toBe('actor-slash-name-dot-with-slash-multiple-dot-parts');
22 |         });
23 | 
24 |         it('should handle tool names longer than 64 characters', () => {
25 |             const longName = 'a'.repeat(70);
26 |             const expected = 'a'.repeat(64);
27 |             expect(actorNameToToolName(longName)).toBe(expected);
28 |         });
29 |     });
30 | });
31 | 


--------------------------------------------------------------------------------
/src/state.ts:
--------------------------------------------------------------------------------
 1 | import {
 2 |     ACTOR_CACHE_MAX_SIZE,
 3 |     ACTOR_CACHE_TTL_SECS,
 4 |     APIFY_DOCS_CACHE_MAX_SIZE,
 5 |     APIFY_DOCS_CACHE_TTL_SECS,
 6 |     GET_HTML_SKELETON_CACHE_MAX_SIZE,
 7 |     GET_HTML_SKELETON_CACHE_TTL_SECS,
 8 |     MCP_SERVER_CACHE_MAX_SIZE,
 9 |     MCP_SERVER_CACHE_TTL_SECS,
10 | } from './const.js';
11 | import type { ActorDefinitionWithInfo, ApifyDocsSearchResult } from './types.js';
12 | import { TTLLRUCache } from './utils/ttl-lru.js';
13 | 
14 | export const actorDefinitionPrunedCache = new TTLLRUCache<ActorDefinitionWithInfo>(ACTOR_CACHE_MAX_SIZE, ACTOR_CACHE_TTL_SECS);
15 | export const searchApifyDocsCache = new TTLLRUCache<ApifyDocsSearchResult[]>(APIFY_DOCS_CACHE_MAX_SIZE, APIFY_DOCS_CACHE_TTL_SECS);
16 | /** Stores processed Markdown content */
17 | export const fetchApifyDocsCache = new TTLLRUCache<string>(APIFY_DOCS_CACHE_MAX_SIZE, APIFY_DOCS_CACHE_TTL_SECS);
18 | /** Stores HTML content per URL so we can paginate the tool output */
19 | export const getHtmlSkeletonCache = new TTLLRUCache<string>(GET_HTML_SKELETON_CACHE_MAX_SIZE, GET_HTML_SKELETON_CACHE_TTL_SECS);
20 | /**
21 |  * Stores MCP server resolution per actor:
22 |  * - false: not an MCP server
23 |  * - string: MCP server URL
24 |  */
25 | export const mcpServerCache = new TTLLRUCache<boolean | string>(MCP_SERVER_CACHE_MAX_SIZE, MCP_SERVER_CACHE_TTL_SECS);
26 | 


--------------------------------------------------------------------------------
/src/utils/tool-status.ts:
--------------------------------------------------------------------------------
 1 | import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
 2 | 
 3 | import { TOOL_STATUS } from '../const.js';
 4 | import type { ToolStatus } from '../types.js';
 5 | import { getHttpStatusCode } from './logging.js';
 6 | 
 7 | /**
 8 |  * Central helper to classify an error into a ToolStatus value.
 9 |  *
10 |  * - TOOL_STATUS.ABORTED   → Request was explicitly aborted by the client.
11 |  * - TOOL_STATUS.SOFT_FAIL → User/client errors (HTTP 4xx, InvalidParams, validation issues).
12 |  * - TOOL_STATUS.FAILED    → Server errors (HTTP 5xx, unknown, or unexpected exceptions).
13 |  */
14 | export function getToolStatusFromError(error: unknown, isAborted: boolean): ToolStatus {
15 |     if (isAborted) {
16 |         return TOOL_STATUS.ABORTED;
17 |     }
18 | 
19 |     const statusCode = getHttpStatusCode(error);
20 | 
21 |     // HTTP client errors (4xx) are treated as user errors
22 |     if (statusCode !== undefined && statusCode >= 400 && statusCode < 500) {
23 |         return TOOL_STATUS.SOFT_FAIL;
24 |     }
25 | 
26 |     // MCP InvalidParams errors are also user errors
27 |     if (error instanceof McpError && error.code === ErrorCode.InvalidParams) {
28 |         return TOOL_STATUS.SOFT_FAIL;
29 |     }
30 | 
31 |     // Everything else is considered a server / unexpected failure
32 |     return TOOL_STATUS.FAILED;
33 | }
34 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # EditorConfig is a file format and collection of text editor plugins
 2 | # for maintaining consistent coding styles between different editors and IDEs
 3 | # See https://editorconfig.org for more information
 4 | 
 5 | root = true
 6 | 
 7 | # Apply to all files
 8 | [*]
 9 | indent_style = space
10 | indent_size = 4
11 | charset = utf-8
12 | trim_trailing_whitespace = true
13 | insert_final_newline = true
14 | end_of_line = lf
15 | # Maximum line length (160 characters)
16 | # Note: editorconfig-tools is unable to ignore long strings or URLs, so this is informational
17 | # ESLint will enforce this limit with its max-len rule
18 | max_line_length = 160
19 | 
20 | # IntelliJ IDEA / WebStorm specific settings
21 | # These settings configure code formatting behavior in JetBrains IDEs
22 | # They ensure consistent formatting when using IDE auto-format features
23 | #   - Adds spaces within TypeScript import braces: import { a, b } instead of import {a,b}
24 | ij_typescript_spaces_within_imports = true
25 | #   - Adds spaces within JavaScript import braces: import { a, b } instead of import {a,b}
26 | ij_javascript_spaces_within_imports = true
27 | #   - Adds spaces within TypeScript union types: string | number instead of string|number
28 | ij_typescript_spaces_within_union_types = true
29 | 
30 | # YAML files use 2-space indentation (YAML standard)
31 | [{*.yaml, *.yml}]
32 | indent_size = 2
33 | 


--------------------------------------------------------------------------------
/src/index-internals.ts:
--------------------------------------------------------------------------------
 1 | /*
 2 |  This file provides essential internal functions for Apify MCP servers, serving as an internal library.
 3 | */
 4 | 
 5 | import { ApifyClient } from './apify-client.js';
 6 | import { defaults, HelperTools } from './const.js';
 7 | import { processParamsGetTools } from './mcp/utils.js';
 8 | import { addTool } from './tools/helpers.js';
 9 | import { defaultTools, getActorsAsTools, toolCategories,
10 |     toolCategoriesEnabledByDefault, unauthEnabledToolCategories, unauthEnabledTools } from './tools/index.js';
11 | import { actorNameToToolName } from './tools/utils.js';
12 | import type { ToolCategory } from './types.js';
13 | import { parseCommaSeparatedList, parseQueryParamList } from './utils/generic.js';
14 | import { getExpectedToolNamesByCategories, getToolPublicFieldOnly } from './utils/tools.js';
15 | import { TTLLRUCache } from './utils/ttl-lru.js';
16 | 
17 | export {
18 |     ApifyClient,
19 |     getExpectedToolNamesByCategories,
20 |     TTLLRUCache,
21 |     actorNameToToolName,
22 |     HelperTools,
23 |     defaults,
24 |     defaultTools,
25 |     addTool,
26 |     toolCategories,
27 |     toolCategoriesEnabledByDefault,
28 |     type ToolCategory,
29 |     processParamsGetTools,
30 |     getActorsAsTools,
31 |     getToolPublicFieldOnly,
32 |     unauthEnabledToolCategories,
33 |     unauthEnabledTools,
34 |     parseCommaSeparatedList,
35 |     parseQueryParamList,
36 | };
37 | 


--------------------------------------------------------------------------------
/res/INDEX.md:
--------------------------------------------------------------------------------
 1 | # Resources Directory Index
 2 | 
 3 | This directory contains useful documents and insights about the repository architecture, design decisions, and implementation details that don't belong in code comments or JSDoc.
 4 | 
 5 | ## Files
 6 | 
 7 | ### [ALGOLIA.md](./ALGOLIA.md)
 8 | Technical analysis of Algolia search API responses for each documentation source.
 9 | - Data structure overview for each doc source (apify, crawlee-js, crawlee-py)
10 | - Field availability patterns (content, hierarchy, anchors)
11 | - Example response payloads
12 | - Recommendations for response processing logic
13 | - **Use case**: Understand what data is actually returned by Algolia to inform simplification decisions
14 | 
15 | ---
16 | 
17 | ## Purpose
18 | 
19 | Resources in this directory serve as:
20 | - **Technical references** for complex subsystems (e.g., Algolia integration)
21 | - **Decision documentation** explaining why certain approaches were chosen
22 | - **Data analysis** for optimization and refactoring efforts
23 | - **Integration guides** for external services and APIs
24 | 
25 | ## Guidelines
26 | 
27 | - Keep documents **short and technical** - avoid duplicating code logic
28 | - Focus on **insights and patterns** rather than implementation details
29 | - Use **tables, examples, and structured data** for clarity
30 | - Link to relevant source files when explaining code flow
31 | - Update when making significant changes to documented systems
32 | 


--------------------------------------------------------------------------------
/tests/integration/actor.server-sse.test.ts:
--------------------------------------------------------------------------------
 1 | import type { Server as HttpServer } from 'node:http';
 2 | 
 3 | import type { Express } from 'express';
 4 | 
 5 | import log from '@apify/log';
 6 | 
 7 | import { createExpressApp } from '../../src/actor/server.js';
 8 | import { createMcpSseClient } from '../helpers.js';
 9 | import { createIntegrationTestsSuite } from './suite.js';
10 | import { getAvailablePort } from './utils/port.js';
11 | 
12 | let app: Express;
13 | let httpServer: HttpServer;
14 | let httpServerPort: number;
15 | let httpServerHost: string;
16 | let mcpUrl: string;
17 | 
18 | createIntegrationTestsSuite({
19 |     suiteName: 'Apify MCP Server SSE',
20 |     transport: 'sse',
21 |     createClientFn: async (options) => await createMcpSseClient(mcpUrl, options),
22 |     beforeAllFn: async () => {
23 |         log.setLevel(log.LEVELS.OFF);
24 | 
25 |         // Get an available port
26 |         httpServerPort = await getAvailablePort();
27 |         httpServerHost = `http://localhost:${httpServerPort}`;
28 |         mcpUrl = `${httpServerHost}/sse`;
29 | 
30 |         // Create an express app
31 |         app = createExpressApp(httpServerHost);
32 | 
33 |         // Start a test server
34 |         await new Promise<void>((resolve) => {
35 |             httpServer = app.listen(httpServerPort, () => resolve());
36 |         });
37 |     },
38 |     afterAllFn: async () => {
39 |         await new Promise<void>((resolve) => {
40 |             httpServer.close(() => resolve());
41 |         });
42 |     },
43 | });
44 | 


--------------------------------------------------------------------------------
/tests/integration/actor.server-streamable.test.ts:
--------------------------------------------------------------------------------
 1 | import type { Server as HttpServer } from 'node:http';
 2 | 
 3 | import type { Express } from 'express';
 4 | 
 5 | import log from '@apify/log';
 6 | 
 7 | import { createExpressApp } from '../../src/actor/server.js';
 8 | import { createMcpStreamableClient } from '../helpers.js';
 9 | import { createIntegrationTestsSuite } from './suite.js';
10 | import { getAvailablePort } from './utils/port.js';
11 | 
12 | let app: Express;
13 | let httpServer: HttpServer;
14 | let httpServerPort: number;
15 | let httpServerHost: string;
16 | let mcpUrl: string;
17 | 
18 | createIntegrationTestsSuite({
19 |     suiteName: 'Apify MCP Server Streamable HTTP',
20 |     transport: 'streamable-http',
21 |     createClientFn: async (options) => await createMcpStreamableClient(mcpUrl, options),
22 |     beforeAllFn: async () => {
23 |         log.setLevel(log.LEVELS.OFF);
24 | 
25 |         // Get an available port
26 |         httpServerPort = await getAvailablePort();
27 |         httpServerHost = `http://localhost:${httpServerPort}`;
28 |         mcpUrl = `${httpServerHost}/mcp`;
29 | 
30 |         // Create an express app
31 |         app = createExpressApp(httpServerHost);
32 | 
33 |         // Start a test server
34 |         await new Promise<void>((resolve) => {
35 |             httpServer = app.listen(httpServerPort, () => resolve());
36 |         });
37 |     },
38 |     afterAllFn: async () => {
39 |         await new Promise<void>((resolve) => {
40 |             httpServer.close(() => resolve());
41 |         });
42 |     },
43 | });
44 | 


--------------------------------------------------------------------------------
/tests/unit/utils.tool-status.test.ts:
--------------------------------------------------------------------------------
 1 | import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
 2 | import { describe, expect, it } from 'vitest';
 3 | 
 4 | import { TOOL_STATUS } from '../../src/const.js';
 5 | import { getToolStatusFromError } from '../../src/utils/tool-status.js';
 6 | 
 7 | describe('getToolStatusFromError', () => {
 8 |     it('returns aborted when isAborted is true', () => {
 9 |         const status = getToolStatusFromError(new Error('any'), true);
10 |         expect(status).toBe(TOOL_STATUS.ABORTED);
11 |     });
12 | 
13 |     it('classifies HTTP 4xx errors as soft_fail', () => {
14 |         const error = Object.assign(new Error('Bad Request'), { statusCode: 400 });
15 |         const status = getToolStatusFromError(error, false);
16 |         expect(status).toBe(TOOL_STATUS.SOFT_FAIL);
17 |     });
18 | 
19 |     it('classifies HTTP 5xx errors as failed', () => {
20 |         const error = Object.assign(new Error('Internal Error'), { statusCode: 500 });
21 |         const status = getToolStatusFromError(error, false);
22 |         expect(status).toBe(TOOL_STATUS.FAILED);
23 |     });
24 | 
25 |     it('classifies McpError InvalidParams as soft_fail', () => {
26 |         const error = new McpError(ErrorCode.InvalidParams, 'invalid', undefined);
27 |         const status = getToolStatusFromError(error, false);
28 |         expect(status).toBe(TOOL_STATUS.SOFT_FAIL);
29 |     });
30 | 
31 |     it('classifies unknown errors without status code as failed', () => {
32 |         const status = getToolStatusFromError(new Error('unknown'), false);
33 |         expect(status).toBe(TOOL_STATUS.FAILED);
34 |     });
35 | });
36 | 


--------------------------------------------------------------------------------
/src/utils/ajv.ts:
--------------------------------------------------------------------------------
 1 | import type { ValidateFunction } from 'ajv';
 2 | import Ajv from 'ajv';
 3 | 
 4 | export const ajv = new Ajv({ coerceTypes: 'array', strict: false });
 5 | 
 6 | /**
 7 |  * Removes the $schema property and fixes the required array from a JSON schema.
 8 |  * The z.toJSONSchema() function in Zod 4.x has two issues:
 9 |  * 1. Includes a $schema reference that can cause issues when compiling with AJV
10 |  * 2. Incorrectly marks fields with default values as required
11 |  *
12 |  * This function fixes both issues to ensure proper schema validation.
13 |  */
14 | function cleanJsonSchema(schema: Record<string, unknown>): Record<string, unknown> {
15 |     const cleaned = { ...schema };
16 |     delete cleaned.$schema;
17 | 
18 |     // Fix the required array: remove fields that have default values
19 |     if (Array.isArray(cleaned.required) && typeof cleaned.properties === 'object' && cleaned.properties !== null) {
20 |         const properties = cleaned.properties as Record<string, unknown>;
21 |         cleaned.required = (cleaned.required as string[]).filter(
22 |             (fieldName) => {
23 |                 const fieldSchema = properties[fieldName];
24 |                 // Only include in required if the field doesn't have a default value
25 |                 return !(typeof fieldSchema === 'object' && fieldSchema !== null && 'default' in fieldSchema);
26 |             },
27 |         );
28 |     }
29 | 
30 |     return cleaned;
31 | }
32 | 
33 | /**
34 |  * Compiles a JSON schema with AJV, automatically cleaning the $schema property
35 |  * and fixing the required array.
36 |  * This wrapper ensures compatibility with z.toJSONSchema() output.
37 |  */
38 | export function compileSchema(schema: Record<string, unknown>): ValidateFunction {
39 |     return ajv.compile(cleanJsonSchema(schema));
40 | }
41 | 


--------------------------------------------------------------------------------
/src/utils/mcp.ts:
--------------------------------------------------------------------------------
 1 | import type { ToolStatus } from '../types.js';
 2 | 
 3 | /**
 4 |  * Helper to build a response for MCP from an array of text strings.
 5 |  * @param options - Object containing response configuration
 6 |  * @param options.texts - Array of text strings to include in the response
 7 |  * @param options.isError - Optional flag to mark the response as an error (default: false).
 8 |  *                          This must remain MCP compliant: true for any tool-level error.
 9 |  * @param options.toolStatus - Optional internal tool status used for telemetry. When provided,
10 |  *                             it will be attached as `_toolStatus` so the server can read it
11 |  *                             and strip it before sending the response to the MCP client.
12 |  * @param options.structuredContent - Optional structured content of unknown type
13 |  */
14 | 
15 | export function buildMCPResponse(options: {
16 |     texts: string[];
17 |     isError?: boolean;
18 |     toolStatus?: ToolStatus;
19 |     structuredContent?: unknown;
20 | }) {
21 |     const {
22 |         texts,
23 |         isError = false,
24 |         toolStatus,
25 |         structuredContent,
26 |     } = options;
27 | 
28 |     const response: {
29 |         content: { type: 'text'; text: string }[];
30 |         isError: boolean;
31 |         internalToolStatus?: ToolStatus;
32 |         structuredContent?: unknown;
33 |     } = {
34 |         content: texts.map((text) => ({ type: 'text', text })),
35 |         isError,
36 |     };
37 | 
38 |     // Attach internal tool status for telemetry; server will read and strip it
39 |     if (toolStatus) {
40 |         response.internalToolStatus = toolStatus;
41 |     }
42 | 
43 |     // Add structured content if provided
44 |     if (structuredContent !== undefined) {
45 |         response.structuredContent = structuredContent;
46 |     }
47 | 
48 |     return response;
49 | }
50 | 


--------------------------------------------------------------------------------
/.github/workflows/evaluations.yaml:
--------------------------------------------------------------------------------
 1 | # This workflow runs MCP tool calling evaluations on master branch merges
 2 | # It evaluates AI models' ability to correctly identify and call MCP tools.
 3 | 
 4 | name: MCP tool calling evaluations
 5 | 
 6 | on:
 7 |     # Run evaluations on PR merges to master or PRs with 'validated' label
 8 |     pull_request:
 9 |         types: [closed, labeled]
10 |         branches:
11 |             - 'master'
12 | 
13 | jobs:
14 |     evaluations:
15 |         name: MCP tool calling evaluations
16 |         runs-on: ubuntu-latest
17 |         # Run on PR merges to master or PRs with 'validated' label
18 |         if: |
19 |             (github.event.action == 'closed' && github.event.pull_request.merged == true) ||
20 |             (github.event.action == 'labeled' && github.event.label.name == 'validated')
21 | 
22 |         steps:
23 |             -   name: Checkout code
24 |                 uses: actions/checkout@v4
25 | 
26 |             -   name: Use Node.js
27 |                 uses: actions/setup-node@v6
28 |                 with:
29 |                     node-version-file: '.nvmrc'
30 |                     cache: 'npm'
31 |                     cache-dependency-path: 'package-lock.json'
32 | 
33 |             -   name: Install Node dependencies
34 |                 run: npm ci --force --include=dev
35 | 
36 |             -   name: Build project
37 |                 run: npm run build
38 | 
39 |             -   name: Run evaluations
40 |                 run: npm run evals:run
41 |                 env:
42 |                     GITHUB_PR_NUMBER: ${{ github.event.pull_request.number }}
43 |                     PHOENIX_API_KEY: ${{ secrets.PHOENIX_API_KEY }}
44 |                     PHOENIX_BASE_URL: ${{ secrets.PHOENIX_BASE_URL }}
45 |                     OPENROUTER_BASE_URL: ${{ secrets.OPENROUTER_BASE_URL }}
46 |                     OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
47 | 


--------------------------------------------------------------------------------
/.actor/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Specify the base Docker image. You can read more about
 2 | # the available images at https://docs.apify.com/sdk/js/docs/guides/docker-images
 3 | # You can also use any other image from Docker Hub.
 4 | FROM apify/actor-node:24 AS builder
 5 | 
 6 | # Check preinstalled packages
 7 | RUN npm ls crawlee apify puppeteer playwright
 8 | 
 9 | # Copy just package.json and package-lock.json
10 | # to speed up the build using Docker layer cache.
11 | COPY package*.json ./
12 | 
13 | # Install all dependencies. Don't audit to speed up the installation.
14 | RUN npm install --include=dev --audit=false
15 | 
16 | # Next, copy the source files using the user set
17 | # in the base image.
18 | COPY . ./
19 | 
20 | # Install all dependencies and build the project.
21 | # Don't audit to speed up the installation.
22 | RUN npm run build
23 | 
24 | # Create final image
25 | FROM apify/actor-node:24
26 | 
27 | # Check preinstalled packages
28 | RUN npm ls crawlee apify puppeteer playwright
29 | 
30 | # Copy just package.json and package-lock.json
31 | # to speed up the build using Docker layer cache.
32 | COPY package*.json ./
33 | 
34 | # Install NPM packages, skip optional and development dependencies to
35 | # keep the image small. Avoid logging too much and print the dependency
36 | # tree for debugging
37 | RUN npm --quiet set progress=false \
38 |     && npm install --omit=dev --omit=optional \
39 |     && echo "Installed NPM packages:" \
40 |     && (npm list --omit=dev --all || true) \
41 |     && echo "Node.js version:" \
42 |     && node --version \
43 |     && echo "NPM version:" \
44 |     && npm --version \
45 |     && rm -r ~/.npm
46 | 
47 | # Copy built JS files from builder image
48 | COPY --from=builder /usr/src/app/dist ./dist
49 | 
50 | # Next, copy the remaining files and directories with the source code.
51 | # Since we do this after NPM install, quick build will be really fast
52 | # for most source file changes.
53 | COPY . ./
54 | 
55 | 
56 | # Run the image.
57 | CMD npm run start:prod --silent
58 | 


--------------------------------------------------------------------------------
/tests/unit/utils.ttl-lru.test.ts:
--------------------------------------------------------------------------------
 1 | import { describe, expect, it } from 'vitest';
 2 | 
 3 | import { TTLLRUCache } from '../../src/utils/ttl-lru.js';
 4 | 
 5 | describe('TTLLRUCache', () => {
 6 |     it('should set and get values before TTL expires', () => {
 7 |         const cache = new TTLLRUCache<string>(2, 2); // 2 seconds TTL
 8 |         cache.set('a', 'valueA');
 9 |         expect(cache.get('a')).toBe('valueA');
10 |     });
11 | 
12 |     it('should return null after TTL expires', async () => {
13 |         const cache = new TTLLRUCache<string>(2, 1); // 1 second TTL
14 |         cache.set('a', 'valueA');
15 |         await new Promise((r) => { setTimeout(r, 1100); });
16 |         expect(cache.get('a')).toBeNull();
17 |     });
18 | 
19 |     it('should evict least recently used items when maxLength is exceeded', () => {
20 |         const cache = new TTLLRUCache<string>(2, 10); // Large TTL
21 |         cache.set('a', 'valueA');
22 |         cache.set('b', 'valueB');
23 |         cache.set('c', 'valueC'); // Should evict 'a'
24 |         expect(cache.get('a')).toBeNull();
25 |         expect(cache.get('b')).toBe('valueB');
26 |         expect(cache.get('c')).toBe('valueC');
27 |     });
28 | 
29 |     it('should update value and TTL on set for existing key', async () => {
30 |         const cache = new TTLLRUCache<string>(2, 1); // 1 second TTL
31 |         cache.set('a', 'valueA');
32 |         await new Promise((r) => { setTimeout(r, 700); });
33 |         cache.set('a', 'valueA2'); // Reset TTL
34 |         await new Promise((r) => { setTimeout(r, 700); });
35 |         expect(cache.get('a')).toBe('valueA2');
36 |     });
37 | 
38 |     it('should remove expired entry on get', async () => {
39 |         const cache = new TTLLRUCache<string>(2, 1); // 1 second TTL
40 |         cache.set('a', 'valueA');
41 |         await new Promise((r) => { setTimeout(r, 1100); });
42 |         expect(cache.get('a')).toBeNull();
43 |         // Should not throw if called again
44 |         expect(cache.get('a')).toBeNull();
45 |     });
46 | });
47 | 


--------------------------------------------------------------------------------
/src/prompts/latest-news-on-topic.ts:
--------------------------------------------------------------------------------
 1 | import type { PromptArgument } from '@modelcontextprotocol/sdk/types.js';
 2 | 
 3 | import { fixedAjvCompile } from '../tools/utils.js';
 4 | import type { PromptBase } from '../types.js';
 5 | import { ajv } from '../utils/ajv.js';
 6 | 
 7 | /**
 8 |  * Prompt MCP arguments list.
 9 |  */
10 | const args: PromptArgument[] = [
11 |     {
12 |         name: 'topic',
13 |         description: 'The topic to retrieve the latest news on.',
14 |         required: true,
15 |     },
16 |     {
17 |         name: 'timespan',
18 |         description: 'The timespan for which to retrieve news articles. Defaults to "7 days". For example "1 day", "3 days", "7 days", "1 month", etc.',
19 |         required: false,
20 |     },
21 | ];
22 | 
23 | /**
24 |  * Prompt AJV arguments schema for validation.
25 |  */
26 | const argsSchema = fixedAjvCompile(ajv, {
27 |     type: 'object',
28 |     properties: {
29 |         ...Object.fromEntries(args.map((arg) => [arg.name, {
30 |             type: 'string',
31 |             description: arg.description,
32 |         }])),
33 |     },
34 |     required: [...args.filter((arg) => arg.required).map((arg) => arg.name)],
35 | });
36 | 
37 | /**
38 |  * Actual prompt definition.
39 |  */
40 | export const latestNewsOnTopicPrompt: PromptBase = {
41 |     name: 'GetLatestNewsOnTopic',
42 |     description: 'This prompt retrieves the latest news articles on a selected topic.',
43 |     arguments: args,
44 |     ajvValidate: argsSchema,
45 |     render: (data) => {
46 |         const currentDateUtc = new Date().toISOString().split('T')[0];
47 |         const timespan = data.timespan && data.timespan.trim() !== '' ? data.timespan : '7 days';
48 |         return `I want you to use the RAG web browser to search the web for the latest news on the "${data.topic}" topic. Retrieve news from the last ${timespan}. The RAG web browser accepts a query parameter that supports all Google input, including filters and flags—be sure to use them to accomplish my goal. Today is ${currentDateUtc} UTC.`;
49 |     },
50 | };
51 | 


--------------------------------------------------------------------------------
/.github/workflows/pre_release.yaml:
--------------------------------------------------------------------------------
 1 | name: Create a pre-release
 2 | 
 3 | on:
 4 |     # Only trigger on PRs with "beta" label
 5 |     pull_request:
 6 |         types: [labeled, synchronize, reopened]
 7 | 
 8 | concurrency:
 9 |     group: release
10 |     cancel-in-progress: false
11 | 
12 | jobs:
13 |     wait_for_checks:
14 |         # Run ONLY when PR has the "beta" label
15 |         if: contains(github.event.pull_request.labels.*.name, 'beta')
16 |         name: Wait for code checks to pass
17 |         runs-on: ubuntu-latest
18 |         steps:
19 |             -   name: Wait for existing checks or skip if none
20 |                 uses: lewagon/wait-on-check-action@v1.3.4
21 |                 with:
22 |                     ref: ${{ github.event.pull_request.head.sha }}
23 |                     repo-token: ${{ secrets.GITHUB_TOKEN }}
24 |                     check-regexp: (Code checks)
25 |                     wait-interval: 10
26 |                     running-workflow-name: 'Wait for code checks to pass'
27 |                     allowed-conclusions: success,neutral,skipped
28 |                 continue-on-error: false
29 |                 
30 |     push_pkg_pr_new:
31 |         needs: [ wait_for_checks ]
32 |         name: Push to pkg.pr.new
33 |         runs-on: ubuntu-latest
34 | 
35 |         steps:
36 |             -   name: Checkout repository
37 |                 uses: actions/checkout@v4
38 |                 with:
39 |                     ref: ${{ github.event.pull_request.head.ref }}
40 |                     repository: ${{ github.event.pull_request.head.repo.full_name }}
41 | 
42 |             -   name: Use Node.js
43 |                 uses: actions/setup-node@v6
44 |                 with:
45 |                     node-version-file: '.nvmrc'
46 |                     cache: 'npm'
47 |                     cache-dependency-path: 'package-lock.json'
48 | 
49 |             -   name: Install dependencies
50 |                 run: npm ci --force
51 |                 
52 |             -   name: Build
53 |                 run: npm run build
54 | 
55 |             -   run: npx -y pkg-pr-new publish
56 | 


--------------------------------------------------------------------------------
/src/utils/actor-details.ts:
--------------------------------------------------------------------------------
 1 | import type { Actor, Build } from 'apify-client';
 2 | 
 3 | import type { ApifyClient } from '../apify-client.js';
 4 | import { filterSchemaProperties, shortenProperties } from '../tools/utils.js';
 5 | import type { ActorInputSchema, StructuredActorCard } from '../types.js';
 6 | import { formatActorToActorCard, formatActorToStructuredCard } from './actor-card.js';
 7 | import { logHttpError } from './logging.js';
 8 | 
 9 | // Keep the type here since it is a self-contained module
10 | export type ActorDetailsResult = {
11 |     actorInfo: Actor;
12 |     buildInfo: Build;
13 |     actorCard: string;
14 |     actorCardStructured: StructuredActorCard;
15 |     inputSchema: ActorInputSchema;
16 |     readme: string;
17 | };
18 | 
19 | export async function fetchActorDetails(apifyClient: ApifyClient, actorName: string): Promise<ActorDetailsResult | null> {
20 |     try {
21 |         const [actorInfo, buildInfo]: [Actor | undefined, Build | undefined] = await Promise.all([
22 |             apifyClient.actor(actorName).get(),
23 |             apifyClient.actor(actorName).defaultBuild().then(async (build) => build.get()),
24 |         ]);
25 |         if (!actorInfo || !buildInfo || !buildInfo.actorDefinition) return null;
26 |         const inputSchema = (buildInfo.actorDefinition.input || {
27 |             type: 'object',
28 |             properties: {},
29 |         }) as ActorInputSchema;
30 |         inputSchema.properties = filterSchemaProperties(inputSchema.properties);
31 |         inputSchema.properties = shortenProperties(inputSchema.properties);
32 |         const actorCard = formatActorToActorCard(actorInfo);
33 |         const actorCardStructured = formatActorToStructuredCard(actorInfo);
34 |         return {
35 |             actorInfo,
36 |             buildInfo,
37 |             actorCard,
38 |             actorCardStructured,
39 |             inputSchema,
40 |             readme: buildInfo.actorDefinition.readme || 'No README provided.',
41 |         };
42 |     } catch (error) {
43 |         logHttpError(error, `Failed to fetch actor details for '${actorName}'`, { actorName });
44 |         return null;
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "apify-mcp-evals"
 3 | version = "0.1.0"
 4 | description = "Python evaluations for Apify MCP Server using Arize Phoenix"
 5 | requires-python = ">=3.12"
 6 | dependencies = [
 7 |     "arize-phoenix>=12.5.0",
 8 |     "anthropic>=0.33.1",
 9 |     "openai>=1.0.0",
10 |     "pandas>=2.0.0",
11 |     "python-dotenv>=1.0.0",
12 |     "tqdm>=4.65.0",
13 | ]
14 | 
15 | [dependency-groups]
16 | dev = [
17 |     "mypy",
18 |     "ruff",
19 | ]
20 | 
21 | [build-system]
22 | requires = ["hatchling"]
23 | build-backend = "hatchling.build"
24 | 
25 | [tool.hatch.build.targets.wheel]
26 | packages = ["evals"]
27 | 
28 | [tool.ruff]
29 | line-length = 120
30 | include = ["*.py"]
31 | 
32 | [tool.ruff.lint]
33 | select = ["ALL"]
34 | ignore = [
35 |     "ANN401", # Dynamically typed expressions (typing.Any) are disallowed in {filename}
36 |     "BLE001", # Do not catch blind exception
37 |     "C901",   # `{name}` is too complex
38 |     "COM812", # This rule may cause conflicts when used with the formatter
39 |     "D100",   # Missing docstring in public module
40 |     "D104",   # Missing docstring in public package
41 |     "D107",   # Missing docstring in `__init__`
42 |     "D203",   # One blank line required before class docstring
43 |     "D213",   # Multi-line docstring summary should start at the second line
44 |     "D413",   # Missing blank line after last section
45 |     "EM",     # flake8-errmsg
46 |     "G004",   # Logging statement uses f-string
47 |     "ISC001", # This rule may cause conflicts when used with the formatter
48 |     "FIX",    # flake8-fixme
49 |     "TRY003", # Avoid specifying long messages outside the exception class
50 | ]
51 | 
52 | [tool.ruff.format]
53 | quote-style = "single"
54 | indent-style = "space"
55 | 
56 | [tool.ruff.lint.per-file-ignores]
57 | "**/__init__.py" = [
58 |     "F401", # Unused imports
59 | ]
60 | 
61 | [tool.ruff.lint.flake8-quotes]
62 | docstring-quotes = "double"
63 | inline-quotes = "single"
64 | 
65 | [tool.ruff.lint.flake8-type-checking]
66 | runtime-evaluated-base-classes = [
67 |     "pydantic.BaseModel",
68 |     "pydantic_settings.BaseSettings",
69 | ]
70 | 
71 | [tool.ruff.lint.flake8-builtins]
72 | builtins-ignorelist = ["id"]
73 | 


--------------------------------------------------------------------------------
/src/utils/html.ts:
--------------------------------------------------------------------------------
 1 | import * as cheerio from 'cheerio';
 2 | 
 3 | type CheerioElementLike = {
 4 |     attribs: Record<string, string>;
 5 |     tagName: string;
 6 | };
 7 | 
 8 | type NodeLike = {
 9 |     type: string;
10 | };
11 | 
12 | /**
13 |  * Strips HTML and keeps only the structure.
14 |  *
15 |  * Removes styles, scripts, and other non-content elements.
16 |  * Collapses whitespace and trims the result.
17 |  * Keeps only href, src, alt, id, class, title, name, data-* attributes.
18 |  * Removes HTML comments and spaces between tags.
19 |  * Removes base64 encoded images.
20 |  */
21 | export function stripHtml(html: string): string {
22 |     const $ = cheerio.load(html);
23 | 
24 |     // Remove all attributes except href (only on a), src, alt, id, class, title, name, data-*
25 |     const allowedAttrs = ['href', 'src', 'alt', 'id', 'class', 'title', 'name'];
26 |     $('*').each((_, element) => {
27 |         const { attribs } = (element as CheerioElementLike);
28 |         if (attribs) {
29 |             Object.keys(attribs).forEach((attr) => {
30 |                 if (attr === 'href' && (element as CheerioElementLike).tagName !== 'a') {
31 |                     $(element).removeAttr(attr);
32 |                 } else if (!allowedAttrs.includes(attr) && !attr.startsWith('data-')) {
33 |                     $(element).removeAttr(attr);
34 |                 }
35 |             });
36 |         }
37 |     });
38 | 
39 |     // Remove <style>, <script>, <noscript>, <iframe>, <svg>, <canvas>, <math> tags and their content
40 |     $('style, script, noscript, iframe, svg, canvas, math').remove();
41 | 
42 |     // Remove HTML comments
43 |     $('*').contents().filter((_, element) => (element as NodeLike).type === 'comment').remove();
44 | 
45 |     // Remove base64 encoded images
46 |     $('img[src^="data:image/"]').remove();
47 | 
48 |     let result;
49 |     if (html.trim() === '') {
50 |         result = '';
51 |     } else if (html.includes('<html')) {
52 |         result = $.html();
53 |     } else {
54 |         result = $('body').html() || '';
55 |     }
56 | 
57 |     // Collapse multiple spaces into one, remove spaces between tags, and trim
58 |     result = result.replace(/\s+/g, ' ').replace(/>\s+</g, '><').trim();
59 |     return result;
60 | }
61 | 


--------------------------------------------------------------------------------
/src/utils/logging.ts:
--------------------------------------------------------------------------------
 1 | import log from '@apify/log';
 2 | 
 3 | /**
 4 |  * Safely extract HTTP status code from errors.
 5 |  * Checks both `statusCode` and `code` properties for compatibility.
 6 |  */
 7 | export function getHttpStatusCode(error: unknown): number | undefined {
 8 |     if (typeof error !== 'object' || error === null) {
 9 |         return undefined;
10 |     }
11 | 
12 |     // Check for statusCode property (used by apify-client)
13 |     if ('statusCode' in error) {
14 |         const { statusCode } = (error as { statusCode?: unknown });
15 |         if (typeof statusCode === 'number' && statusCode >= 100 && statusCode < 600) {
16 |             return statusCode;
17 |         }
18 |     }
19 | 
20 |     // Check for code property (used by some error types)
21 |     if ('code' in error) {
22 |         const { code } = (error as { code?: unknown });
23 |         if (typeof code === 'number' && code >= 100 && code < 600) {
24 |             return code;
25 |         }
26 |     }
27 | 
28 |     return undefined;
29 | }
30 | 
31 | /**
32 |  * Logs HTTP errors based on status code, following apify-core pattern.
33 |  * Uses `softFail` for status < 500 (API client errors) and `exception` for status >= 500 (API server errors).
34 |  *
35 |  * @param error - The error object
36 |  * @param message - The log message
37 |  * @param data - Additional data to include in the log
38 |  */
39 | export function logHttpError<T extends object>(error: unknown, message: string, data?: T): void {
40 |     const statusCode = getHttpStatusCode(error);
41 |     const errorMessage = error instanceof Error ? error.message : String(error);
42 | 
43 |     if (statusCode !== undefined && statusCode < 500) {
44 |         // Client errors (< 500) - log as softFail without stack trace
45 |         log.softFail(message, { error: errorMessage, statusCode, ...data });
46 |     } else if (statusCode !== undefined && statusCode >= 500) {
47 |         // Server errors (>= 500) - log as exception with full error (includes stack trace)
48 |         const errorObj = error instanceof Error ? error : new Error(String(error));
49 |         log.exception(errorObj, message, { statusCode, ...data });
50 |     } else {
51 |         // No status code available - log as error
52 |         log.error(message, { error, ...data });
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/src/utils/tools.ts:
--------------------------------------------------------------------------------
 1 | import { toolCategories } from '../tools/index.js';
 2 | import type { HelperTool, ToolBase, ToolCategory, ToolEntry } from '../types.js';
 3 | 
 4 | /**
 5 |  * Returns a public version of the tool containing only fields that should be exposed publicly.
 6 |  * Used for the tools list request.
 7 |  */
 8 | export function getToolPublicFieldOnly(tool: ToolBase) {
 9 |     return {
10 |         name: tool.name,
11 |         title: tool.title,
12 |         description: tool.description,
13 |         inputSchema: tool.inputSchema,
14 |         outputSchema: tool.outputSchema,
15 |         annotations: tool.annotations,
16 |         icons: tool.icons,
17 |         execution: tool.execution,
18 |     };
19 | }
20 | 
21 | /**
22 |  * Returns the tool objects for the given category names using toolCategories.
23 |  */
24 | export function getExpectedToolsByCategories(categories: ToolCategory[]): ToolEntry[] {
25 |     return categories
26 |         .flatMap((category) => toolCategories[category] || []);
27 | }
28 | 
29 | /**
30 |  * Returns the tool names for the given category names using getExpectedToolsByCategories.
31 |  */
32 | export function getExpectedToolNamesByCategories(categories: ToolCategory[]): string[] {
33 |     return getExpectedToolsByCategories(categories).map((tool) => tool.name);
34 | }
35 | 
36 | /**
37 |  * Creates a deep copy of a tool entry, preserving functions like ajvValidate and call
38 |  * while cloning all other properties to avoid shared state mutations.
39 |  */
40 | export function cloneToolEntry(toolEntry: ToolEntry): ToolEntry {
41 |     // Store the original functions
42 |     const originalAjvValidate = toolEntry.ajvValidate;
43 |     const originalCall = toolEntry.type === 'internal' ? toolEntry.call : undefined;
44 | 
45 |     // Create a deep copy using JSON serialization (excluding functions)
46 |     const cloned = JSON.parse(JSON.stringify(toolEntry, (key, value) => {
47 |         if (key === 'ajvValidate' || key === 'call') return undefined;
48 |         return value;
49 |     })) as ToolEntry;
50 | 
51 |     // Restore the original functions
52 |     cloned.ajvValidate = originalAjvValidate;
53 |     if (toolEntry.type === 'internal' && originalCall) {
54 |         (cloned as HelperTool).call = originalCall;
55 |     }
56 | 
57 |     return cloned;
58 | }
59 | 


--------------------------------------------------------------------------------
/tests/unit/mcp.utils.test.ts:
--------------------------------------------------------------------------------
 1 | import { describe, expect, it } from 'vitest';
 2 | 
 3 | import { parseInputParamsFromUrl } from '../../src/mcp/utils.js';
 4 | 
 5 | describe('parseInputParamsFromUrl', () => {
 6 |     it('should parse Actors from URL query params (as tools)', () => {
 7 |         const url = 'https://mcp.apify.com?token=123&actors=apify/web-scraper';
 8 |         const result = parseInputParamsFromUrl(url);
 9 |         expect(result.tools).toEqual(['apify/web-scraper']);
10 |         expect(result.actors).toBeUndefined();
11 |     });
12 | 
13 |     it('should parse multiple Actors from URL (as tools)', () => {
14 |         const url = 'https://mcp.apify.com?actors=apify/instagram-scraper,lukaskrivka/google-maps';
15 |         const result = parseInputParamsFromUrl(url);
16 |         expect(result.tools).toEqual(['apify/instagram-scraper', 'lukaskrivka/google-maps']);
17 |         expect(result.actors).toBeUndefined();
18 |     });
19 | 
20 |     it('should handle URL without query params', () => {
21 |         const url = 'https://mcp.apify.com';
22 |         const result = parseInputParamsFromUrl(url);
23 |         expect(result.actors).toBeUndefined();
24 |     });
25 | 
26 |     it('should parse enableActorAutoLoading flag', () => {
27 |         const url = 'https://mcp.apify.com?enableActorAutoLoading=true';
28 |         const result = parseInputParamsFromUrl(url);
29 |         expect(result.enableAddingActors).toBe(true);
30 |     });
31 | 
32 |     it('should parse enableAddingActors flag', () => {
33 |         const url = 'https://mcp.apify.com?enableAddingActors=true';
34 |         const result = parseInputParamsFromUrl(url);
35 |         expect(result.enableAddingActors).toBe(true);
36 |     });
37 | 
38 |     it('should parse enableAddingActors flag', () => {
39 |         const url = 'https://mcp.apify.com?enableAddingActors=false';
40 |         const result = parseInputParamsFromUrl(url);
41 |         expect(result.enableAddingActors).toBe(false);
42 |     });
43 | 
44 |     it('should handle Actors as string parameter (as tools)', () => {
45 |         const url = 'https://mcp.apify.com?actors=apify/rag-web-browser';
46 |         const result = parseInputParamsFromUrl(url);
47 |         expect(result.tools).toEqual(['apify/rag-web-browser']);
48 |         expect(result.actors).toBeUndefined();
49 |     });
50 | });
51 | 


--------------------------------------------------------------------------------
/src/utils/ttl-lru.ts:
--------------------------------------------------------------------------------
 1 | import { LruCache } from '@apify/datastructures';
 2 | 
 3 | /**
 4 |  * LRU cache with TTL (time-to-live) for storing entries.
 5 |  *
 6 |  * This class wraps an LRU cache and adds a time-to-live (TTL) expiration to each entry.
 7 |  * When an entry is accessed, it is checked for expiration and removed if expired.
 8 |  *
 9 |  * Usage:
10 |  *   ```typescript
11 |  *   const cache = new TTLLRUCache<string>(100, 60); // 100 items, 60 seconds TTL
12 |  *   cache.set('key', 'value');
13 |  *   const value = cache.get('key');
14 |  *   ```
15 |  */
16 | export class TTLLRUCache<T> {
17 |     // Internal LRU cache storing value and expiration timestamp for each entry
18 |     private readonly cache: LruCache<{
19 |         value: T;
20 |         expiresAt: number;
21 |     }>;
22 | 
23 |     // Time-to-live in milliseconds for each entry
24 |     private readonly ttlMillis: number;
25 | 
26 |     /**
27 |      * @param maxLength Maximum number of items in the cache (LRU eviction)
28 |      * @param ttlSecs Time-to-live for each entry, in seconds
29 |      */
30 |     constructor(maxLength: number, ttlSecs: number) {
31 |         this.ttlMillis = ttlSecs * 1000;
32 |         this.cache = new LruCache<{
33 |             value: T;
34 |             expiresAt: number;
35 |         }>({
36 |             maxLength,
37 |         });
38 |     }
39 | 
40 |     /**
41 |      * Set a value in the cache with the given key. If the key exists, it is updated and TTL is reset.
42 |      * @param key Cache key
43 |      * @param value Value to store
44 |      */
45 |     set(key: string, value: T) {
46 |         // If the key already exists, remove it to update the value and reset TTL
47 |         if (this.cache.get(key)) {
48 |             this.cache.remove(key);
49 |         }
50 |         this.cache.add(key, {
51 |             value,
52 |             expiresAt: Date.now() + this.ttlMillis,
53 |         });
54 |     }
55 | 
56 |     /**
57 |      * Get a value from the cache by key. Returns null if not found or expired.
58 |      * @param key Cache key
59 |      * @returns The value if present and not expired, otherwise null
60 |      */
61 |     get(key: string): T | null {
62 |         const entry = this.cache.get(key);
63 |         if (entry && entry.expiresAt > Date.now()) {
64 |             return entry.value;
65 |         }
66 |         this.cache.remove(key); // Remove expired entry
67 |         return null;
68 |     }
69 | }
70 | 


--------------------------------------------------------------------------------
/tests/unit/utils.progress.test.ts:
--------------------------------------------------------------------------------
 1 | import { describe, expect, it, vi } from 'vitest';
 2 | 
 3 | import { ProgressTracker } from '../../src/utils/progress.js';
 4 | 
 5 | describe('ProgressTracker', () => {
 6 |     it('should send progress notifications correctly', async () => {
 7 |         const mockSendNotification = vi.fn();
 8 |         const progressToken = 'test-token-123';
 9 |         const tracker = new ProgressTracker(progressToken, mockSendNotification);
10 | 
11 |         await tracker.updateProgress('Quarter done');
12 | 
13 |         expect(mockSendNotification).toHaveBeenCalledWith({
14 |             method: 'notifications/progress',
15 |             params: {
16 |                 progressToken,
17 |                 progress: 1,
18 |                 message: 'Quarter done',
19 |             },
20 |         });
21 |     });
22 | 
23 |     it('should track actor run status updates', async () => {
24 |         const mockSendNotification = vi.fn();
25 |         const tracker = new ProgressTracker('test-token', mockSendNotification);
26 | 
27 |         // Test with a simple manual update instead of mocking the full actor run flow
28 |         await tracker.updateProgress('test-actor: READY');
29 |         await tracker.updateProgress('test-actor: RUNNING');
30 |         await tracker.updateProgress('test-actor: SUCCEEDED');
31 | 
32 |         expect(mockSendNotification).toHaveBeenCalledTimes(3);
33 |         expect(mockSendNotification).toHaveBeenNthCalledWith(1, {
34 |             method: 'notifications/progress',
35 |             params: {
36 |                 progressToken: 'test-token',
37 |                 progress: 1,
38 |                 message: 'test-actor: READY',
39 |             },
40 |         });
41 |         expect(mockSendNotification).toHaveBeenNthCalledWith(3, {
42 |             method: 'notifications/progress',
43 |             params: {
44 |                 progressToken: 'test-token',
45 |                 progress: 3,
46 |                 message: 'test-actor: SUCCEEDED',
47 |             },
48 |         });
49 |     });
50 | 
51 |     it('should handle notification send errors gracefully', async () => {
52 |         const mockSendNotification = vi.fn().mockRejectedValue(new Error('Network error'));
53 |         const tracker = new ProgressTracker('test-token', mockSendNotification);
54 | 
55 |         // Should not throw
56 |         await expect(tracker.updateProgress('Test')).resolves.toBeUndefined();
57 |         expect(mockSendNotification).toHaveBeenCalled();
58 |     });
59 | });
60 | 


--------------------------------------------------------------------------------
/src/main.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Serves as an Actor MCP SSE server entry point.
 3 |  * This file needs to be named `main.ts` to be recognized by the Apify platform.
 4 |  */
 5 | 
 6 | import { Actor } from 'apify';
 7 | import type { ActorCallOptions } from 'apify-client';
 8 | 
 9 | import log from '@apify/log';
10 | 
11 | import { createExpressApp } from './actor/server.js';
12 | import { ApifyClient } from './apify-client.js';
13 | import { processInput } from './input.js';
14 | import { callActorGetDataset } from './tools/index.js';
15 | import type { Input } from './types.js';
16 | 
17 | const STANDBY_MODE = Actor.getEnv().metaOrigin === 'STANDBY';
18 | 
19 | await Actor.init();
20 | 
21 | const HOST = Actor.isAtHome() ? process.env.ACTOR_STANDBY_URL as string : 'http://localhost';
22 | const PORT = Actor.isAtHome() ? Number(process.env.ACTOR_STANDBY_PORT) : 3001;
23 | 
24 | if (!process.env.APIFY_TOKEN) {
25 |     log.error('APIFY_TOKEN is required but not set in the environment variables.');
26 |     process.exit(1);
27 | }
28 | 
29 | const input = processInput((await Actor.getInput<Partial<Input>>()) ?? ({} as Input));
30 | log.info('Loaded input', { input: JSON.stringify(input) });
31 | 
32 | if (STANDBY_MODE) {
33 |     // In standby mode, actors and tools are provided via URL query params per request
34 |     // Start express app
35 |     const app = createExpressApp(HOST);
36 |     log.info('Actor is running in the STANDBY mode.');
37 | 
38 |     app.listen(PORT, () => {
39 |         log.info('Actor web server listening', { host: HOST, port: PORT });
40 |     });
41 | } else {
42 |     log.info('Actor is not designed to run in the NORMAL model (use this mode only for debugging purposes)');
43 | 
44 |     if (input && !input.debugActor && !input.debugActorInput) {
45 |         await Actor.fail('If you need to debug a specific Actor, please provide the debugActor and debugActorInput fields in the input');
46 |     }
47 |     const options = { memory: input.maxActorMemoryBytes } as ActorCallOptions;
48 | 
49 |     const apifyClient = new ApifyClient({ token: process.env.APIFY_TOKEN });
50 |     const callResult = await callActorGetDataset(input.debugActor!, input.debugActorInput!, apifyClient, options);
51 | 
52 |     if (callResult && callResult.previewItems.length > 0) {
53 |         await Actor.pushData(callResult.previewItems);
54 |         log.info('Pushed items to dataset', { itemCount: callResult.previewItems.length });
55 |     }
56 |     await Actor.exit();
57 | }
58 | 
59 | // So Ctrl+C works locally
60 | process.on('SIGINT', async () => {
61 |     log.info('Received SIGINT, shutting down gracefully...');
62 |     await Actor.exit();
63 | });
64 | 


--------------------------------------------------------------------------------
/tests/unit/mcp.actors.test.ts:
--------------------------------------------------------------------------------
 1 | import type { ActorDefinition } from 'apify-client';
 2 | import { describe, expect, it } from 'vitest';
 3 | 
 4 | import { MCP_STREAMABLE_ENDPOINT } from '../../src/const.js';
 5 | import { getActorMCPServerPath } from '../../src/mcp/actors.js';
 6 | 
 7 | // Helper to create a valid ActorDefinition and allow webServerMcpPath for testing
 8 | function makeActorDefinitionWithPath(webServerMcpPath?: unknown): ActorDefinition {
 9 |     return {
10 |         actorSpecification: 0,
11 |         name: 'dummy',
12 |         version: '0.0',
13 |         ...(webServerMcpPath !== undefined ? { webServerMcpPath } : {}),
14 |     };
15 | }
16 | 
17 | describe('getActorMCPServerPath', () => {
18 |     it('should return null if webServerMcpPath is missing', () => {
19 |         const actorDefinition = makeActorDefinitionWithPath();
20 |         const result = getActorMCPServerPath(actorDefinition);
21 |         expect(result).toBeNull();
22 |     });
23 | 
24 |     it('should return null if webServerMcpPath is not a string', () => {
25 |         const actorDefinition = makeActorDefinitionWithPath(123);
26 |         const result = getActorMCPServerPath(actorDefinition);
27 |         expect(result).toBeNull();
28 |     });
29 | 
30 |     it('should return the single path if only one is present', () => {
31 |         const actorDefinition = makeActorDefinitionWithPath('/mcp');
32 |         const result = getActorMCPServerPath(actorDefinition);
33 |         expect(result).toBe('/mcp');
34 |     });
35 | 
36 |     it('should return the streamable path if present among multiple', () => {
37 |         const actorDefinition = makeActorDefinitionWithPath(`/foo, ${MCP_STREAMABLE_ENDPOINT}, /bar`);
38 |         const result = getActorMCPServerPath(actorDefinition);
39 |         expect(result).toBe(MCP_STREAMABLE_ENDPOINT);
40 |     });
41 | 
42 |     it('should return the first path if streamable is not present', () => {
43 |         const actorDefinition = makeActorDefinitionWithPath('/foo, /bar, /baz');
44 |         const result = getActorMCPServerPath(actorDefinition);
45 |         expect(result).toBe('/foo');
46 |     });
47 | 
48 |     it('should trim whitespace from paths', () => {
49 |         const actorDefinition = makeActorDefinitionWithPath('   /foo  ,   /bar  ');
50 |         const result = getActorMCPServerPath(actorDefinition);
51 |         expect(result).toBe('/foo');
52 |     });
53 | 
54 |     it('should handle streamable path with whitespace', () => {
55 |         const actorDefinition = makeActorDefinitionWithPath(` /foo ,   ${MCP_STREAMABLE_ENDPOINT}  , /bar `);
56 |         const result = getActorMCPServerPath(actorDefinition);
57 |         expect(result).toBe(MCP_STREAMABLE_ENDPOINT);
58 |     });
59 | });
60 | 


--------------------------------------------------------------------------------
/src/tools/run_collection.ts:
--------------------------------------------------------------------------------
 1 | import { z } from 'zod';
 2 | 
 3 | import { ApifyClient } from '../apify-client.js';
 4 | import { HelperTools } from '../const.js';
 5 | import type { InternalToolArgs, ToolEntry, ToolInputSchema } from '../types.js';
 6 | import { compileSchema } from '../utils/ajv.js';
 7 | import { buildMCPResponse } from '../utils/mcp.js';
 8 | 
 9 | const getUserRunsListArgs = z.object({
10 |     offset: z.number()
11 |         .describe('Number of array elements that should be skipped at the start. The default value is 0.')
12 |         .default(0),
13 |     limit: z.number()
14 |         .max(10)
15 |         .describe('Maximum number of array elements to return. The default value (as well as the maximum) is 10.')
16 |         .default(10),
17 |     desc: z.boolean()
18 |         .describe('If true or 1 then the runs are sorted by the startedAt field in descending order. Default: sorted in ascending order.')
19 |         .default(false),
20 |     status: z.enum(['READY', 'RUNNING', 'SUCCEEDED', 'FAILED', 'TIMING-OUT', 'TIMED-OUT', 'ABORTING', 'ABORTED'])
21 |         .optional()
22 |         .describe('Return only runs with the provided status.'),
23 | });
24 | 
25 | /**
26 |  * https://docs.apify.com/api/v2/act-runs-get
27 |  */
28 | export const getUserRunsList: ToolEntry = {
29 |     type: 'internal',
30 |     name: HelperTools.ACTOR_RUN_LIST_GET,
31 |     description: `List Actor runs for the authenticated user with optional filtering and sorting.
32 | The results will include run details (including datasetId and keyValueStoreId) and can be filtered by status.
33 | Valid statuses: READY (not allocated), RUNNING (executing), SUCCEEDED (finished), FAILED (failed), TIMING-OUT, TIMED-OUT, ABORTING, ABORTED.
34 | 
35 | USAGE:
36 | - Use when you need to browse or filter recent Actor runs.
37 | 
38 | USAGE EXAMPLES:
39 | - user_input: List my last 10 runs (newest first)
40 | - user_input: Show only SUCCEEDED runs`,
41 |     inputSchema: z.toJSONSchema(getUserRunsListArgs) as ToolInputSchema,
42 |     ajvValidate: compileSchema(z.toJSONSchema(getUserRunsListArgs)),
43 |     annotations: {
44 |         title: 'Get user runs list',
45 |         readOnlyHint: true,
46 |         openWorldHint: false,
47 |     },
48 |     call: async (toolArgs: InternalToolArgs) => {
49 |         const { args, apifyToken } = toolArgs;
50 |         const parsed = getUserRunsListArgs.parse(args);
51 |         const client = new ApifyClient({ token: apifyToken });
52 |         const runs = await client.runs().list({ limit: parsed.limit, offset: parsed.offset, desc: parsed.desc, status: parsed.status });
53 |         return buildMCPResponse({
54 |             texts: [`\`\`\`json\n${JSON.stringify(runs)}\n\`\`\``],
55 |         });
56 |     },
57 | } as const;
58 | 


--------------------------------------------------------------------------------
/src/mcp/actors.ts:
--------------------------------------------------------------------------------
 1 | import type { ActorDefinition } from 'apify-client';
 2 | 
 3 | import { ApifyClient } from '../apify-client.js';
 4 | import { MCP_STREAMABLE_ENDPOINT } from '../const.js';
 5 | import type { ActorDefinitionPruned } from '../types.js';
 6 | import { parseCommaSeparatedList } from '../utils/generic.js';
 7 | 
 8 | /**
 9 |  * Returns the MCP server path for the given Actor ID.
10 |  * Prioritizes the streamable transport path if available.
11 |  * The `webServerMcpPath` is a string containing MCP endpoint or endpoints separated by commas.
12 |  */
13 | export function getActorMCPServerPath(actorDefinition: ActorDefinition | ActorDefinitionPruned): string | null {
14 |     if ('webServerMcpPath' in actorDefinition && typeof actorDefinition.webServerMcpPath === 'string') {
15 |         const webServerMcpPath = actorDefinition.webServerMcpPath.trim();
16 | 
17 |         const paths = parseCommaSeparatedList(webServerMcpPath);
18 |         // If there is only one path, return it directly
19 |         if (paths.length === 1) {
20 |             return paths[0];
21 |         }
22 | 
23 |         // If there are multiple paths, prioritize the streamable transport path
24 |         // otherwise return the first one.
25 |         const streamablePath = paths.find((path) => path === MCP_STREAMABLE_ENDPOINT);
26 |         if (streamablePath) {
27 |             return streamablePath;
28 |         }
29 |         // Otherwise, return the first path
30 |         return paths[0];
31 |     }
32 | 
33 |     return null;
34 | }
35 | 
36 | /**
37 |  * Returns the MCP server URL for the given Actor ID.
38 |  */
39 | export async function getActorMCPServerURL(realActorId: string, mcpServerPath: string): Promise<string> {
40 |     // TODO: get from API instead
41 |     const standbyBaseUrl = process.env.HOSTNAME === 'mcp-securitybyobscurity.apify.com'
42 |         ? 'securitybyobscurity.apify.actor' : 'apify.actor';
43 |     const standbyUrl = await getActorStandbyURL(realActorId, standbyBaseUrl);
44 |     return `${standbyUrl}${mcpServerPath}`;
45 | }
46 | 
47 | /**
48 | * Gets Actor ID from the Actor object.
49 | */
50 | export async function getRealActorID(actorIdOrName: string, apifyToken: string): Promise<string> {
51 |     const apifyClient = new ApifyClient({ token: apifyToken });
52 | 
53 |     const actor = apifyClient.actor(actorIdOrName);
54 |     const info = await actor.get();
55 |     if (!info) {
56 |         throw new Error(`Actor ${actorIdOrName} not found`);
57 |     }
58 |     return info.id;
59 | }
60 | 
61 | /**
62 | * Returns standby URL for given Actor ID.
63 | */
64 | export async function getActorStandbyURL(realActorId: string, standbyBaseUrl = 'apify.actor'): Promise<string> {
65 |     return `https://${realActorId}.${standbyBaseUrl}`;
66 | }
67 | 


--------------------------------------------------------------------------------
/src/apify-client.ts:
--------------------------------------------------------------------------------
 1 | import type { ApifyClientOptions } from 'apify';
 2 | import { ApifyClient as _ApifyClient } from 'apify-client';
 3 | import type { AxiosRequestConfig } from 'axios';
 4 | 
 5 | import { USER_AGENT_ORIGIN } from './const.js';
 6 | 
 7 | type ExtendedApifyClientOptions = Omit<ApifyClientOptions, 'token'> & {
 8 |     token?: string | null | undefined;
 9 |     skyfirePayId?: string;
10 | };
11 | 
12 | /**
13 |  * Adds a User-Agent header to the request config.
14 |  * @param config
15 |  * @private
16 |  */
17 | function addUserAgent(config: AxiosRequestConfig): AxiosRequestConfig {
18 |     const updatedConfig = { ...config };
19 |     updatedConfig.headers = updatedConfig.headers ?? {};
20 |     updatedConfig.headers['User-Agent'] = `${updatedConfig.headers['User-Agent'] ?? ''}; ${USER_AGENT_ORIGIN}`;
21 |     return updatedConfig;
22 | }
23 | 
24 | export function getApifyAPIBaseUrl(): string {
25 |     // Workaround for Actor server where the platform APIFY_API_BASE_URL did not work with getActorDefinition from actors.ts
26 |     if (process.env.APIFY_IS_AT_HOME) return 'https://api.apify.com';
27 |     return process.env.APIFY_API_BASE_URL || 'https://api.apify.com';
28 | }
29 | 
30 | export class ApifyClient extends _ApifyClient {
31 |     constructor(options: ExtendedApifyClientOptions) {
32 |         /**
33 |          * In order to publish to DockerHub, we need to run their build task to validate our MCP server.
34 |          * This was failing since we were sending this dummy token to Apify in order to build the Actor tools.
35 |          * So if we encounter this dummy value, we remove it to use Apify client as unauthenticated, which is sufficient
36 |          * for server start and listing of tools.
37 |          */
38 |         if (options.token?.toLowerCase() === 'your-apify-token' || options.token === null) {
39 |             // eslint-disable-next-line no-param-reassign
40 |             delete options.token;
41 |         }
42 | 
43 |         const { skyfirePayId, ...clientOptions } = options;
44 |         const requestInterceptors = [addUserAgent];
45 |         /**
46 |          * Add skyfire-pay-id header if provided.
47 |          */
48 |         if (skyfirePayId) {
49 |             requestInterceptors.push((config) => {
50 |                 const updatedConfig = { ...config };
51 |                 updatedConfig.headers = updatedConfig.headers ?? {};
52 |                 updatedConfig.headers['skyfire-pay-id'] = skyfirePayId;
53 |                 return updatedConfig;
54 |             });
55 |         }
56 | 
57 |         super({
58 |             // token null case is handled, we can assert type here
59 |             ...clientOptions as ApifyClientOptions,
60 |             baseUrl: getApifyAPIBaseUrl(),
61 |             requestInterceptors,
62 |         });
63 |     }
64 | }
65 | 


--------------------------------------------------------------------------------
/manifest.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "manifest_version": "0.2",
 3 |   "name": "apify-mcp-server",
 4 |   "display_name": "Apify MCP server",
 5 |   "version": "0.6.5",
 6 |   "description": "Extract data from any website using thousands of tools from the Apify Store.",
 7 |   "long_description": "Apify is the world's largest marketplace of tools for web scraping, data extraction, and web automation. You can extract structured data from social media, e-commerce, search engines, maps, travel sites, or any other website.",
 8 |   "keywords": [
 9 |     "apify",
10 |     "actors",
11 |     "dataset",
12 |     "mcp",
13 |     "automation",
14 |     "web",
15 |     "web scraping",
16 |     "web automation",
17 |     "web scraper",
18 |     "web crawler",
19 |     "scraping",
20 |     "data extraction",
21 |     "API"
22 |   ],
23 |   "author": {
24 |     "name": "Apify Technologies s.r.o.",
25 |     "url": "https://github.com/apify/apify-mcp-server"
26 |   },
27 |   "license": "MIT",
28 |   "privacy_policies": [
29 |     "https://docs.apify.com/legal/privacy-policy",
30 |     "https://docs.apify.com/legal/gdpr-information",
31 |     "https://docs.apify.com/legal"
32 |   ],
33 |   "repository": {
34 |     "type": "git",
35 |     "url": "https://github.com/apify/apify-mcp-server"
36 |   },
37 |   "homepage": "https://mcp.apify.com",
38 |   "support": "https://github.com/apify/apify-mcp-server/issues",
39 |   "icon": "docs/apify-logo.png",
40 |   "screenshots": [
41 |     "docs/actors-mcp-server.png"
42 |   ],
43 |   "server": {
44 |     "type": "node",
45 |     "entry_point": "dist/stdio.js",
46 |     "mcp_config": {
47 |       "command": "node",
48 |       "args": [
49 |         "${__dirname}/dist/stdio.js",
50 |         "--tools",
51 |         "${user_config.tools}"
52 |       ],
53 |       "env": {
54 |         "APIFY_TOKEN": "${user_config.apify_token}"
55 |       }
56 |     }
57 |   },
58 |   "tools_generated": true,
59 |   "prompts_generated": true,
60 |   "user_config": {
61 |     "apify_token": {
62 |       "type": "string",
63 |       "title": "Apify token",
64 |       "description": "Your Apify API token for authentication",
65 |       "sensitive": true,
66 |       "required": true
67 |     },
68 |     "tools": {
69 |       "type": "string",
70 |       "title": "Enabled tools",
71 |       "description": "Comma-separated list of tools to enable. Can be either a tool category, a specific tool, or an Apify Actor. For example: \"actors,docs,apify/rag-web-browser\". For more details visit https://mcp.apify.com.",
72 |       "required": false,
73 |       "default": "actors,docs,apify/rag-web-browser"
74 |     }
75 |   },
76 |   "compatibility": {
77 |     "claude_desktop": ">=0.2.16",
78 |     "platforms": [
79 |       "darwin",
80 |       "win32",
81 |       "linux"
82 |     ],
83 |     "runtimes": {
84 |       "node": ">=20.0.0"
85 |     }
86 |   }
87 | }
88 | 


--------------------------------------------------------------------------------
/src/tools/key_value_store_collection.ts:
--------------------------------------------------------------------------------
 1 | import { z } from 'zod';
 2 | 
 3 | import { ApifyClient } from '../apify-client.js';
 4 | import { HelperTools } from '../const.js';
 5 | import type { InternalToolArgs, ToolEntry, ToolInputSchema } from '../types.js';
 6 | import { compileSchema } from '../utils/ajv.js';
 7 | 
 8 | const getUserKeyValueStoresListArgs = z.object({
 9 |     offset: z.number()
10 |         .describe('Number of array elements that should be skipped at the start. The default is 0.')
11 |         .default(0),
12 |     limit: z.number()
13 |         .max(10)
14 |         .describe('Maximum number of array elements to return. The default value (and maximum) is 10.')
15 |         .default(10),
16 |     desc: z.boolean()
17 |         .describe('If true or 1 then the stores are sorted by the createdAt field in descending order. Default: sorted in ascending order.')
18 |         .default(false),
19 |     unnamed: z.boolean()
20 |         .describe('If true or 1 then all the stores are returned. By default, only named key-value stores are returned.')
21 |         .default(false),
22 | });
23 | 
24 | /**
25 |  * https://docs.apify.com/api/v2/key-value-stores-get
26 |  */
27 | export const getUserKeyValueStoresList: ToolEntry = {
28 |     type: 'internal',
29 |     name: HelperTools.KEY_VALUE_STORE_LIST_GET,
30 |     description: `List key-value stores owned by the authenticated user.
31 | Actor runs automatically produce unnamed stores (set unnamed=true to include them). Users can also create named stores.
32 | 
33 | The results will include basic info for each store, sorted by createdAt (ascending by default).
34 | Use limit, offset, and desc to paginate and sort.
35 | 
36 | USAGE:
37 | - Use when you need to browse available key-value stores (named or unnamed).
38 | 
39 | USAGE EXAMPLES:
40 | - user_input: List my last 10 key-value stores (newest first)
41 | - user_input: List unnamed key-value stores`,
42 |     inputSchema: z.toJSONSchema(getUserKeyValueStoresListArgs) as ToolInputSchema,
43 |     ajvValidate: compileSchema(z.toJSONSchema(getUserKeyValueStoresListArgs)),
44 |     annotations: {
45 |         title: 'Get user key-value stores list',
46 |         readOnlyHint: true,
47 |         openWorldHint: false,
48 |     },
49 |     call: async (toolArgs: InternalToolArgs) => {
50 |         const { args, apifyToken } = toolArgs;
51 |         const parsed = getUserKeyValueStoresListArgs.parse(args);
52 |         const client = new ApifyClient({ token: apifyToken });
53 |         const stores = await client.keyValueStores().list({
54 |             limit: parsed.limit,
55 |             offset: parsed.offset,
56 |             desc: parsed.desc,
57 |             unnamed: parsed.unnamed,
58 |         });
59 |         return { content: [{ type: 'text', text: `\`\`\`json\n${JSON.stringify(stores)}\n\`\`\`` }] };
60 |     },
61 | } as const;
62 | 


--------------------------------------------------------------------------------
/src/tools/dataset_collection.ts:
--------------------------------------------------------------------------------
 1 | import { z } from 'zod';
 2 | 
 3 | import { ApifyClient } from '../apify-client.js';
 4 | import { HelperTools } from '../const.js';
 5 | import type { InternalToolArgs, ToolEntry, ToolInputSchema } from '../types.js';
 6 | import { compileSchema } from '../utils/ajv.js';
 7 | 
 8 | const getUserDatasetsListArgs = z.object({
 9 |     offset: z.number()
10 |         .describe('Number of array elements that should be skipped at the start. The default value is 0.')
11 |         .default(0),
12 |     limit: z.number()
13 |         .max(20)
14 |         .describe('Maximum number of array elements to return. The default value (as well as the maximum) is 20.')
15 |         .default(10),
16 |     desc: z.boolean()
17 |         .describe('If true or 1 then the datasets are sorted by the createdAt field in descending order. Default: sorted in ascending order.')
18 |         .default(false),
19 |     unnamed: z.boolean()
20 |         .describe('If true or 1 then all the datasets are returned. By default only named datasets are returned.')
21 |         .default(false),
22 | });
23 | 
24 | /**
25 |  * https://docs.apify.com/api/v2/datasets-get
26 |  */
27 | export const getUserDatasetsList: ToolEntry = {
28 |     type: 'internal',
29 |     name: HelperTools.DATASET_LIST_GET,
30 |     description: `List datasets (collections of Actor run data) for the authenticated user.
31 | Actor runs automatically produce unnamed datasets (set unnamed=true to include them). Users can also create named datasets.
32 | 
33 | The results will include datasets with itemCount, access settings, and usage stats, sorted by createdAt (ascending by default).
34 | Use limit (max 20), offset, and desc to paginate and sort.
35 | 
36 | USAGE:
37 | - Use when you need to browse available datasets (named or unnamed) to locate data.
38 | 
39 | USAGE EXAMPLES:
40 | - user_input: List my last 10 datasets (newest first)
41 | - user_input: List unnamed datasets`,
42 |     inputSchema: z.toJSONSchema(getUserDatasetsListArgs) as ToolInputSchema,
43 |     ajvValidate: compileSchema(z.toJSONSchema(getUserDatasetsListArgs)),
44 |     annotations: {
45 |         title: 'Get user datasets list',
46 |         readOnlyHint: true,
47 |         openWorldHint: false,
48 |     },
49 |     call: async (toolArgs: InternalToolArgs) => {
50 |         const { args, apifyToken } = toolArgs;
51 |         const parsed = getUserDatasetsListArgs.parse(args);
52 |         const client = new ApifyClient({ token: apifyToken });
53 |         const datasets = await client.datasets().list({
54 |             limit: parsed.limit,
55 |             offset: parsed.offset,
56 |             desc: parsed.desc,
57 |             unnamed: parsed.unnamed,
58 |         });
59 |         return { content: [{ type: 'text', text: `\`\`\`json\n${JSON.stringify(datasets)}\n\`\`\`` }] };
60 |     },
61 | } as const;
62 | 


--------------------------------------------------------------------------------
/tests/unit/schema-generation.test.ts:
--------------------------------------------------------------------------------
 1 | import { describe, expect, it } from 'vitest';
 2 | 
 3 | import { generateSchemaFromItems } from '../../src/utils/schema-generation.js';
 4 | 
 5 | describe('generateSchemaFromItems', () => {
 6 |     it('should generate basic schema from simple objects', () => {
 7 |         const items = [{ name: 'John', age: 30 }];
 8 |         const result = generateSchemaFromItems(items);
 9 |         expect(result).toBeDefined();
10 |         expect(result?.type).toBe('array');
11 |         expect(result?.items).toBeDefined();
12 |         const props = result?.items.properties;
13 |         expect(props).toBeDefined();
14 |         if (props) {
15 |             expect(props.name?.type).toBe('string');
16 |             expect(props.age?.type).toBe('integer');
17 |         }
18 |     });
19 | 
20 |     it('should handle different data types', () => {
21 |         const items = [
22 |             { string: 'test', number: 42, boolean: true, object: { nested: 'value' }, array: [1, 2, 3] },
23 |         ];
24 |         const result = generateSchemaFromItems(items);
25 |         expect(result).toBeDefined();
26 |         expect(result?.type).toBe('array');
27 |         if (result?.items && typeof result.items === 'object' && 'properties' in result.items) {
28 |             const props = result.items.properties;
29 |             expect(props).toBeDefined();
30 |             if (props) {
31 |                 expect(props.string?.type).toBe('string');
32 |                 expect(props.number?.type).toBe('integer');
33 |                 expect(props.boolean?.type).toBe('boolean');
34 |                 expect(props.object?.type).toBe('object');
35 |                 expect(props.array?.type).toBe('array');
36 |                 expect(props.object?.properties?.nested?.type).toBe('string');
37 |                 expect(props.array?.items?.type).toBe('integer');
38 |             }
39 |         }
40 |     });
41 | 
42 |     it('should respect the limit option', () => {
43 |         const items = [
44 |             { id: 1, name: 'A' },
45 |             { id: 2, name: 'B' },
46 |             { id: 3, name: 'C' },
47 |             { id: 4, extra: 'D' },
48 |             { id: 5, extra: 'E' },
49 |         ];
50 |         const result = generateSchemaFromItems(items, { limit: 3 });
51 |         expect(result).toBeDefined();
52 |         expect(result?.type).toBe('array');
53 |         if (result?.items && typeof result.items === 'object' && 'properties' in result.items) {
54 |             const props = result.items.properties;
55 |             expect(props).toBeDefined();
56 |             if (props) {
57 |                 expect(props.id).toBeDefined();
58 |                 expect(props.name).toBeDefined();
59 |                 expect(props.extra).toBeUndefined(); // Should not include fields from items beyond limit
60 |             }
61 |         }
62 |     });
63 | });
64 | 


--------------------------------------------------------------------------------
/src/mcp/utils.ts:
--------------------------------------------------------------------------------
 1 | import { createHash } from 'node:crypto';
 2 | import { parse } from 'node:querystring';
 3 | 
 4 | import type { TaskStore } from '@modelcontextprotocol/sdk/experimental/tasks/interfaces.js';
 5 | import type { ApifyClient } from 'apify-client';
 6 | 
 7 | import { processInput } from '../input.js';
 8 | import type { Input } from '../types.js';
 9 | import { loadToolsFromInput } from '../utils/tools-loader.js';
10 | import { MAX_TOOL_NAME_LENGTH, SERVER_ID_LENGTH } from './const.js';
11 | 
12 | /**
13 |  * Generates a unique server ID based on the provided URL.
14 |  *
15 |  * URL is used instead of Actor ID because one Actor may expose multiple servers - legacy SSE / streamable HTTP.
16 |  *
17 |  * @param url The URL to generate the server ID from.
18 |  * @returns A unique server ID.
19 |  */
20 | export function getMCPServerID(url: string): string {
21 |     const serverHashDigest = createHash('sha256').update(url).digest('hex');
22 | 
23 |     return serverHashDigest.slice(0, SERVER_ID_LENGTH);
24 | }
25 | 
26 | /**
27 |  * Generates a unique tool name based on the provided URL and tool name.
28 |  * @param url The URL to generate the tool name from.
29 |  * @param toolName The tool name to generate the tool name from.
30 |  * @returns A unique tool name.
31 |  */
32 | export function getProxyMCPServerToolName(url: string, toolName: string): string {
33 |     const prefix = getMCPServerID(url);
34 | 
35 |     const fullName = `${prefix}-${toolName}`;
36 |     return fullName.slice(0, MAX_TOOL_NAME_LENGTH);
37 | }
38 | 
39 | /**
40 |  * Process input parameters from URL and get tools
41 |  * If URL contains query parameter `actors`, return tools from Actors otherwise return null.
42 |  * @param url The URL to process
43 |  * @param apifyClient The Apify client instance
44 |  */
45 | export async function processParamsGetTools(url: string, apifyClient: ApifyClient) {
46 |     const input = parseInputParamsFromUrl(url);
47 |     return await loadToolsFromInput(input, apifyClient);
48 | }
49 | 
50 | export function parseInputParamsFromUrl(url: string): Input {
51 |     const query = url.split('?')[1] || '';
52 |     const params = parse(query) as unknown as Input;
53 |     return processInput(params);
54 | }
55 | 
56 | /**
57 |  * Checks if a task was cancelled, preventing state transitions from terminal states.
58 |  * Critical for task execution: prevents SDK errors when trying to transition from 'cancelled' to 'working'.
59 |  * @param taskId - The task identifier
60 |  * @param mcpSessionId - The MCP session ID
61 |  * @param taskStore - The task store instance
62 |  * @returns true if task is cancelled, false otherwise
63 |  */
64 | export async function isTaskCancelled(
65 |     taskId: string,
66 |     mcpSessionId: string | undefined,
67 |     taskStore: TaskStore,
68 | ): Promise<boolean> {
69 |     const task = await taskStore.getTask(taskId, mcpSessionId);
70 |     return task?.status === 'cancelled';
71 | }
72 | 


--------------------------------------------------------------------------------
/evals/eval-single.ts:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env tsx
 2 | 
 3 | import dotenv from 'dotenv';
 4 | import log from '@apify/log';
 5 | import {
 6 |     loadTools,
 7 |     createOpenRouterTask,
 8 |     createToolSelectionLLMEvaluator,
 9 |     loadTestCases, filterById,
10 |     type TestCase
11 | } from './evaluation-utils.js';
12 | import { PASS_THRESHOLD, sanitizeHeaderValue } from './config.js';
13 | 
14 | dotenv.config({ path: '.env' });
15 | log.setLevel(log.LEVELS.INFO);
16 | 
17 | // const MODEL_NAME = 'openai/gpt-4.1-mini';
18 | const MODEL_NAME = 'anthropic/claude-haiku-4.5'
19 | const RUN_LLM_JUDGE = true;
20 | 
21 | // Hardcoded examples for quick testing
22 | const EXAMPLES: TestCase[] = [
23 | ];
24 | 
25 | EXAMPLES.push(...filterById(loadTestCases('test-cases.json').testCases, 'weather-mcp-search-then-call-1'));
26 | 
27 | async function main() {
28 |     process.env.OPENROUTER_API_KEY = sanitizeHeaderValue(process.env.OPENROUTER_API_KEY);
29 | 
30 |     console.log(`\nEvaluating ${EXAMPLES.length} examples\n`);
31 | 
32 |     // 1. Load tools
33 |     const tools = await loadTools();
34 |     console.log(`Loaded ${tools.length} tools\n`);
35 | 
36 |     // Loop through each example
37 |     for (let i = 0; i < EXAMPLES.length; i++) {
38 |         const example = EXAMPLES[i];
39 | 
40 |         console.log(`\n=== Example ${i + 1}/${EXAMPLES.length}: ${example.id} ===`);
41 |         console.log('Query:', example.query);
42 |         console.log('Expected tools:', example.expectedTools);
43 | 
44 |         // 2. Call LLM with tools
45 |         console.log('\nRunning LLM tool calling');
46 |         const task = createOpenRouterTask(MODEL_NAME, tools);
47 |         const output = await task({ input: example as unknown as Record<string, unknown> });
48 | 
49 |         console.log('\nLLM response');
50 |         console.log('Tool calls:', JSON.stringify(output.tool_calls, null, 2));
51 |         console.log('Message:', output.llm_response || '(no message)');
52 | 
53 |         if (!RUN_LLM_JUDGE) {
54 |             console.log('Skipping LLM evaluation as RUN_LLM_JUDGE is set to false');
55 |             console.log('='.repeat(50));
56 |         } else {
57 |             // 3. Evaluate with LLM judge
58 |             console.log('\nEvaluating with LLM');
59 |             const llmEvaluator = createToolSelectionLLMEvaluator(tools);
60 |             const result = await llmEvaluator.evaluate({
61 |                 input: example as unknown as Record<string, unknown>,
62 |                 output,
63 |                 expected: example as unknown as Record<string, unknown>
64 |             });
65 | 
66 |             const passed = result.score ? (result.score > PASS_THRESHOLD) : false;
67 |             console.log('\nEvaluation result');
68 |             console.log('Score:', result.score );
69 |             console.log('Explanation:', result.explanation);
70 |             console.log('Passed:', result.score ? (passed ? 'True ✅' : 'False ❌') : 'False ❌');
71 |             console.log('='.repeat(50));
72 |         }
73 |     }
74 | }
75 | 
76 | main().catch(console.error);
77 | 


--------------------------------------------------------------------------------
/src/utils/actor-response.ts:
--------------------------------------------------------------------------------
 1 | import type { CallActorGetDatasetResult } from '../tools/actor.js';
 2 | 
 3 | /**
 4 |  * Builds the response content for Actor tool calls.
 5 |  * Includes Actor run metadata, output schema, and a preview of output items.
 6 |  *
 7 |  * The response starts with a preview of Actor output items, if available.
 8 |  * This must come first. Metadata and instructions for the LLM are provided last.
 9 |  * The LLM may ignore metadata and instructions if it is not at the end of the response.
10 |  *
11 |  * If the preview is limited and does not show all items, the response informs the LLM.
12 |  * This is important because the LLM may assume it has all data and hallucinate missing items.
13 |  *
14 |  * @param actorName - The name of the actor.
15 |  * @param result - The result from callActorGetDataset.
16 |  * @returns The content array for the tool response.
17 |  */
18 | export function buildActorResponseContent(
19 |     actorName: string,
20 |     result: CallActorGetDatasetResult,
21 | ): ({ type: 'text'; text: string })[] {
22 |     const { runId, datasetId, itemCount, schema } = result;
23 | 
24 |     // Extract item schema if schema is an array
25 |     let displaySchema = schema;
26 |     if (schema && schema.type === 'array' && typeof schema.items === 'object' && schema.items !== null) {
27 |         displaySchema = schema.items;
28 |     }
29 | 
30 |     // Construct text content
31 |     const textContent = `Actor "${actorName}" completed successfully!
32 | 
33 | Results summary:
34 | • Run ID: ${runId}
35 | • Dataset ID: ${datasetId}
36 | • Total items: ${itemCount}
37 | 
38 | Actor output data schema:
39 | * You can use this schema to understand the structure of the output data and, for example, retrieve specific fields based on your current task.
40 | \`\`\`json
41 | ${JSON.stringify(displaySchema)}
42 | \`\`\`
43 | 
44 | Above this text block is a preview of the Actor output containing ${result.previewItems.length} item(s).${itemCount !== result.previewItems.length ? ` You have access only to a limited preview of the Actor output. Do not present this as the full output, as you have only ${result.previewItems.length} item(s) available instead of the full ${itemCount} item(s). Be aware of this and inform users about the currently loaded count and the total available output items count.` : ''}
45 | 
46 | If you need to retrieve additional data, use the "get-actor-output" tool with: datasetId: "${datasetId}". Be sure to limit the number of results when using the "get-actor-output" tool, since you never know how large the items may be, and they might exceed the output limits.
47 | `;
48 | 
49 |     const itemsPreviewText = result.previewItems.length > 0
50 |         ? JSON.stringify(result.previewItems)
51 |         : `No items available for preview—either the Actor did not return any items or they are too large for preview. In this case, use the "get-actor-output" tool.`;
52 | 
53 |     // Build content array
54 |     return [
55 |         { type: 'text', text: itemsPreviewText },
56 |         /**
57 |          * The metadata and instructions text must be at the end otherwise the LLM does not acknowledge it.
58 |          */
59 |         { type: 'text', text: textContent },
60 |     ];
61 | }
62 | 


--------------------------------------------------------------------------------
/src/telemetry.ts:
--------------------------------------------------------------------------------
 1 | import * as crypto from 'node:crypto';
 2 | 
 3 | import { Analytics } from '@segment/analytics-node';
 4 | 
 5 | import log from '@apify/log';
 6 | 
 7 | import {
 8 |     DEFAULT_TELEMETRY_ENV,
 9 |     SEGMENT_FLUSH_AT_EVENTS,
10 |     SEGMENT_FLUSH_INTERVAL_MS,
11 |     TELEMETRY_ENV,
12 | } from './const.js';
13 | import type { TelemetryEnv, ToolCallTelemetryProperties } from './types.js';
14 | 
15 | const DEV_WRITE_KEY = '9rPHlMtxX8FJhilGEwkfUoZ0uzWxnzcT';
16 | const PROD_WRITE_KEY = 'cOkp5EIJaN69gYaN8bcp7KtaD0fGABwJ';
17 | 
18 | // Event names following apify-core naming convention (Title Case)
19 | const SEGMENT_EVENTS = {
20 |     TOOL_CALL: 'MCP Tool Call',
21 | } as const;
22 | 
23 | /**
24 |  * Gets the telemetry environment, defaulting to 'PROD' if not provided or invalid
25 |  */
26 | export function getTelemetryEnv(env?: string | null): TelemetryEnv {
27 |     if (!env) {
28 |         return DEFAULT_TELEMETRY_ENV;
29 |     }
30 |     const normalizedEnv = env.toUpperCase();
31 |     if (normalizedEnv === TELEMETRY_ENV.DEV || normalizedEnv === TELEMETRY_ENV.PROD) {
32 |         return normalizedEnv as TelemetryEnv;
33 |     }
34 |     return DEFAULT_TELEMETRY_ENV;
35 | }
36 | 
37 | // Single Segment Analytics client (environment determined by process.env.TELEMETRY_ENV)
38 | let analyticsClient: Analytics | null = null;
39 | 
40 | /**
41 |  * Gets or initializes the Segment Analytics client.
42 |  * The environment is determined by the TELEMETRY_ENV environment variable.
43 |  *
44 |  * @returns Analytics client instance or null if initialization failed
45 |  */
46 | export function getOrInitAnalyticsClient(telemetryEnv: TelemetryEnv): Analytics | null {
47 |     if (!analyticsClient) {
48 |         try {
49 |             const writeKey = telemetryEnv === TELEMETRY_ENV.PROD ? PROD_WRITE_KEY : DEV_WRITE_KEY;
50 |             analyticsClient = new Analytics({
51 |                 writeKey,
52 |                 flushAt: SEGMENT_FLUSH_AT_EVENTS,
53 |                 flushInterval: SEGMENT_FLUSH_INTERVAL_MS,
54 |             });
55 |         } catch (error) {
56 |             log.error('Segment initialization failed', { error });
57 |             return null;
58 |         }
59 |     }
60 |     return analyticsClient;
61 | }
62 | 
63 | /**
64 |  * Tracks a tool call event to Segment.
65 |  * Segment requires either userId OR anonymousId, but not both
66 |  * When userId is available, use it; otherwise use anonymousId
67 |  *
68 |  * @param userId - Apify user ID (null if not available)
69 |  * @param telemetryEnv - Telemetry environment
70 |  * @param properties - Event properties for the tool call
71 |  */
72 | export function trackToolCall(
73 |     userId: string | null,
74 |     telemetryEnv: TelemetryEnv,
75 |     properties: ToolCallTelemetryProperties,
76 | ): void {
77 |     const client = getOrInitAnalyticsClient(telemetryEnv);
78 | 
79 |     try {
80 |         client?.track({
81 |             ...(userId ? { userId } : { anonymousId: crypto.randomUUID() }),
82 |             event: SEGMENT_EVENTS.TOOL_CALL,
83 |             properties,
84 |         });
85 |     } catch (error) {
86 |         log.error('Failed to track tool call event', { error, userId, toolName: properties.tool_name });
87 |     }
88 | }
89 | 


--------------------------------------------------------------------------------
/tests/unit/telemetry.test.ts:
--------------------------------------------------------------------------------
 1 | import { beforeEach, describe, expect, it, vi } from 'vitest';
 2 | 
 3 | import { trackToolCall } from '../../src/telemetry.js';
 4 | 
 5 | // Mock the Segment Analytics client
 6 | const mockTrack = vi.fn();
 7 | vi.mock('@segment/analytics-node', () => ({
 8 |     Analytics: vi.fn().mockImplementation(() => ({
 9 |         track: mockTrack,
10 |     })),
11 | }));
12 | 
13 | describe('telemetry', () => {
14 |     beforeEach(() => {
15 |         vi.clearAllMocks();
16 |     });
17 | 
18 |     it('should send correct payload structure to Segment with userId', () => {
19 |         const userId = 'test-user-123';
20 |         const properties = {
21 |             app: 'mcp' as const,
22 |             app_version: '0.5.6',
23 |             mcp_client_name: 'test-client',
24 |             mcp_client_version: '1.0.0',
25 |             mcp_protocol_version: '2024-11-05',
26 |             mcp_client_capabilities: {},
27 |             mcp_session_id: 'session-123',
28 |             transport_type: 'stdio',
29 |             tool_name: 'test-tool',
30 |             tool_status: 'SUCCEEDED' as const,
31 |             tool_exec_time_ms: 100,
32 |         };
33 | 
34 |         trackToolCall(userId, 'DEV', properties);
35 | 
36 |         expect(mockTrack).toHaveBeenCalledWith({
37 |             userId: 'test-user-123',
38 |             event: 'MCP Tool Call',
39 |             properties: {
40 |                 app: 'mcp',
41 |                 app_version: '0.5.6',
42 |                 mcp_client_name: 'test-client',
43 |                 mcp_client_version: '1.0.0',
44 |                 mcp_protocol_version: '2024-11-05',
45 |                 mcp_client_capabilities: {},
46 |                 mcp_session_id: 'session-123',
47 |                 transport_type: 'stdio',
48 |                 tool_name: 'test-tool',
49 |                 tool_status: 'SUCCEEDED',
50 |                 tool_exec_time_ms: 100,
51 |             },
52 |         });
53 |     });
54 | 
55 |     it('should use anonymousId when userId is null', () => {
56 |         const properties = {
57 |             app: 'mcp' as const,
58 |             app_version: '0.5.6',
59 |             mcp_client_name: 'test-client',
60 |             mcp_client_version: '1.0.0',
61 |             mcp_protocol_version: '2024-11-05',
62 |             mcp_client_capabilities: {},
63 |             mcp_session_id: 'session-123',
64 |             transport_type: 'stdio',
65 |             tool_name: 'test-tool',
66 |             tool_status: 'SUCCEEDED' as const,
67 |             tool_exec_time_ms: 100,
68 |         };
69 | 
70 |         trackToolCall(null, 'DEV', properties);
71 | 
72 |         expect(mockTrack).toHaveBeenCalledTimes(1);
73 |         const callArgs = mockTrack.mock.calls[0][0];
74 | 
75 |         // Should have anonymousId but not userId
76 |         expect(callArgs).toHaveProperty('anonymousId');
77 |         expect(callArgs.anonymousId).toBeDefined();
78 |         expect(typeof callArgs.anonymousId).toBe('string');
79 |         expect(callArgs.anonymousId.length).toBeGreaterThan(0);
80 |         expect(callArgs).not.toHaveProperty('userId');
81 |         expect(callArgs.event).toBe('MCP Tool Call');
82 |         expect(callArgs.properties).toEqual(properties);
83 |     });
84 | });
85 | 


--------------------------------------------------------------------------------
/.actor/input_schema.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "title": "Apify MCP Server",
 3 |     "type": "object",
 4 |     "schemaVersion": 1,
 5 |     "properties": {
 6 |         "actors": {
 7 |             "title": "Actors to be exposed for an AI application (AI agent)",
 8 |             "type": "array",
 9 |             "description": "List Actors to be exposed to an AI application (AI agent) for communication via the MCP protocol. \n\n Ensure the Actor definitions fit within the LLM context by limiting the number of used Actors.",
10 |             "editor": "stringList",
11 |             "prefill": [
12 |                 "apify/instagram-scraper",
13 |                 "apify/rag-web-browser",
14 |                 "lukaskrivka/google-maps-with-contact-details"
15 |             ]
16 |         },
17 |         "enableActorAutoLoading": {
18 |             "title": "Enable automatic loading of Actors based on context and use-case (experimental, check if it supported by your client) (deprecated, use enableAddingActors instead)",
19 |             "type": "boolean",
20 |             "description": "When enabled, the server can dynamically add Actors as tools based on user requests and context. \n\nNote: MCP client must support notification on tool updates. To try it, you can use the [Tester MCP Client](https://apify.com/jiri.spilka/tester-mcp-client). This is an experimental feature and may require client-specific support.",
21 |             "default": false,
22 |             "editor": "hidden"
23 |         },
24 |         "enableAddingActors": {
25 |             "title": "Enable adding Actors based on context and use-case (experimental, check if it supported by your client)",
26 |             "type": "boolean",
27 |             "description": "When enabled, the server can dynamically add Actors as tools based on user requests and context. \n\nNote: MCP client must support notification on tool updates. To try it, you can use the [Tester MCP Client](https://apify.com/jiri.spilka/tester-mcp-client). This is an experimental feature and may require client-specific support.",
28 |             "default": true
29 |         },
30 |         "maxActorMemoryBytes": {
31 |             "title": "Limit the maximum memory used by an Actor",
32 |             "type": "integer",
33 |             "description": "Limit the maximum memory used by an Actor in bytes. This is important setting for Free plan users to avoid exceeding the memory limit.",
34 |             "prefill": 4096,
35 |             "default": 4096
36 |         },
37 |         "debugActor": {
38 |             "title": "Debug Actor",
39 |             "type": "string",
40 |             "description": "Specify the name of the Actor that will be used for debugging in normal mode",
41 |             "editor": "textfield",
42 |             "prefill": "apify/rag-web-browser",
43 |             "sectionCaption": "Debugging settings (normal mode)"
44 |         },
45 |         "debugActorInput": {
46 |             "title": "Debug Actor input",
47 |             "type": "object",
48 |             "description": "Specify the input for the Actor that will be used for debugging in normal mode",
49 |             "editor": "json",
50 |             "prefill": {
51 |                 "query": "hello world"
52 |             }
53 |         }
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/src/tools/helpers.ts:
--------------------------------------------------------------------------------
 1 | import { z } from 'zod';
 2 | 
 3 | import { ApifyClient } from '../apify-client.js';
 4 | import { HelperTools } from '../const.js';
 5 | import type { InternalToolArgs, ToolEntry, ToolInputSchema } from '../types.js';
 6 | import { compileSchema } from '../utils/ajv.js';
 7 | 
 8 | export const addToolArgsSchema = z.object({
 9 |     actor: z.string()
10 |         .min(1)
11 |         .describe(`Actor ID or full name in the format "username/name", e.g., "apify/rag-web-browser".`),
12 | });
13 | export const addTool: ToolEntry = {
14 |     type: 'internal',
15 |     name: HelperTools.ACTOR_ADD,
16 |     description: `Add an Actor or MCP server to the Apify MCP Server as an available tool.
17 | This does not execute the Actor; it only registers it so it can be called later.
18 | 
19 | You can first discover Actors using the ${HelperTools.STORE_SEARCH} tool, then add the selected Actor as a tool.
20 | 
21 | USAGE:
22 | - Use when a user has chosen an Actor to work with and you need to make it available as a callable tool.
23 | 
24 | USAGE EXAMPLES:
25 | - user_input: Add apify/rag-web-browser as a tool
26 | - user_input: Add apify/instagram-scraper as a tool`,
27 |     inputSchema: z.toJSONSchema(addToolArgsSchema) as ToolInputSchema,
28 |     ajvValidate: compileSchema(z.toJSONSchema(addToolArgsSchema)),
29 |     annotations: {
30 |         title: 'Add tool',
31 |         openWorldHint: true,
32 |     },
33 |     // TODO: I don't like that we are passing apifyMcpServer and mcpServer to the tool
34 |     call: async (toolArgs: InternalToolArgs) => {
35 |         const { apifyMcpServer, apifyToken, args, extra: { sendNotification } } = toolArgs;
36 |         const parsed = addToolArgsSchema.parse(args);
37 |         if (apifyMcpServer.listAllToolNames().includes(parsed.actor)) {
38 |             return {
39 |                 content: [{
40 |                     type: 'text',
41 |                     text: `Actor ${parsed.actor} is already available. No new tools were added.`,
42 |                 }],
43 |             };
44 |         }
45 | 
46 |         const apifyClient = new ApifyClient({ token: apifyToken });
47 |         const tools = await apifyMcpServer.loadActorsAsTools([parsed.actor], apifyClient);
48 |         /**
49 |          * If no tools were found, return a message that the Actor was not found
50 |          * instead of returning that non existent tool was added since the
51 |          * loadActorsAsTools method returns an empty array and does not throw an error.
52 |          */
53 |         if (tools.length === 0) {
54 |             return {
55 |                 content: [{
56 |                     type: 'text',
57 |                     text: `Actor ${parsed.actor} not found, no tools were added.`,
58 |                 }],
59 |             };
60 |         }
61 | 
62 |         await sendNotification({ method: 'notifications/tools/list_changed' });
63 | 
64 |         return {
65 |             content: [{
66 |                 type: 'text',
67 |                 text: `Actor ${parsed.actor} has been added. Newly available tools: ${
68 |                     tools.map(
69 |                         (t: ToolEntry) => `${t.name}`,
70 |                     ).join(', ')
71 |                 }.`,
72 |             }],
73 |         };
74 |     },
75 | } as const;
76 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "name": "@apify/actors-mcp-server",
  3 |   "version": "0.6.5",
  4 |   "type": "module",
  5 |   "description": "Apify MCP Server",
  6 |   "mcpName": "com.apify/apify-mcp-server",
  7 |   "engines": {
  8 |     "node": ">=20.0.0"
  9 |   },
 10 |   "main": "dist/index.js",
 11 |   "exports": {
 12 |     ".": "./dist/index.js",
 13 |     "./internals": "./dist/index-internals.js",
 14 |     "./internals.js": "./dist/index-internals.js",
 15 |     "./manifest.json": "./manifest.json"
 16 |   },
 17 |   "bin": {
 18 |     "actors-mcp-server": "./dist/stdio.js"
 19 |   },
 20 |   "files": [
 21 |     "dist",
 22 |     "LICENSE",
 23 |     "package.json",
 24 |     "server.json",
 25 |     "manifest.json"
 26 |   ],
 27 |   "repository": {
 28 |     "type": "git",
 29 |     "url": "https://github.com/apify/apify-mcp-server.git"
 30 |   },
 31 |   "bugs": {
 32 |     "url": "https://github.com/apify/apify-mcp-server/issues"
 33 |   },
 34 |   "homepage": "https://mcp.apify.com",
 35 |   "keywords": [
 36 |     "apify",
 37 |     "mcp",
 38 |     "server",
 39 |     "actors",
 40 |     "model context protocol"
 41 |   ],
 42 |   "dependencies": {
 43 |     "@apify/datastructures": "^2.0.3",
 44 |     "@apify/log": "^2.5.16",
 45 |     "@modelcontextprotocol/sdk": "^1.24.3",
 46 |     "@segment/analytics-node": "^2.3.0",
 47 |     "@types/cheerio": "^0.22.35",
 48 |     "@types/turndown": "^5.0.5",
 49 |     "ajv": "^8.17.1",
 50 |     "algoliasearch": "^5.31.0",
 51 |     "apify": "^3.4.2",
 52 |     "apify-client": "^2.12.6",
 53 |     "cheerio": "^1.1.2",
 54 |     "dotenv": "^16.4.7",
 55 |     "express": "^4.21.2",
 56 |     "mcp-client-capabilities": "^0.0.5",
 57 |     "to-json-schema": "^0.2.5",
 58 |     "turndown": "^7.2.0",
 59 |     "yargs": "^17.7.2",
 60 |     "zod": "^4.1.13"
 61 |   },
 62 |   "devDependencies": {
 63 |     "@ai-sdk/openai": "^2.0.52",
 64 |     "@apify/eslint-config": "^1.1.0",
 65 |     "@apify/tsconfig": "^0.1.0",
 66 |     "@arizeai/phoenix-client": "^4.2.0",
 67 |     "@arizeai/phoenix-evals": "^0.2.2",
 68 |     "@types/express": "^4.0.0",
 69 |     "@types/to-json-schema": "^0.2.4",
 70 |     "@types/yargs": "^17.0.33",
 71 |     "@types/yargs-parser": "^21.0.3",
 72 |     "eslint": "^9.19.0",
 73 |     "eventsource": "^3.0.2",
 74 |     "openai": "^6.10.0",
 75 |     "tsx": "^4.20.6",
 76 |     "typescript": "^5.3.3",
 77 |     "typescript-eslint": "^8.23.0",
 78 |     "vitest": "^3.0.8"
 79 |   },
 80 |   "scripts": {
 81 |     "start": "npm run start:dev",
 82 |     "start:prod": "node dist/main.js",
 83 |     "start:dev": "tsx src/main.ts",
 84 |     "start:standby": "APIFY_META_ORIGIN=\"STANDBY\" npm run start:dev",
 85 |     "lint": "eslint .",
 86 |     "lint:fix": "eslint . --fix",
 87 |     "check": "npm run type-check && npm run lint:fix",
 88 |     "build": "tsc -b src",
 89 |     "build:watch": "tsc -b src -w",
 90 |     "type-check": "tsc --noEmit",
 91 |     "test": "npm run test:unit",
 92 |     "test:unit": "vitest run tests/unit",
 93 |     "test:integration": "npm run build && vitest run tests/integration",
 94 |     "clean": "tsc -b src --clean",
 95 |     "evals:create-dataset": "tsx evals/create-dataset.ts",
 96 |     "evals:run": "tsx evals/run-evaluation.ts"
 97 |   },
 98 |   "author": "Apify",
 99 |   "license": "MIT"
100 | }
101 | 


--------------------------------------------------------------------------------
/src/tools/index.ts:
--------------------------------------------------------------------------------
 1 | // Import specific tools that are being used
 2 | import type { ToolCategory } from '../types.js';
 3 | import { getExpectedToolsByCategories } from '../utils/tools.js';
 4 | import { callActor, callActorGetDataset, getActorsAsTools } from './actor.js';
 5 | import { getDataset, getDatasetItems, getDatasetSchema } from './dataset.js';
 6 | import { getUserDatasetsList } from './dataset_collection.js';
 7 | import { fetchActorDetailsTool } from './fetch-actor-details.js';
 8 | import { fetchApifyDocsTool } from './fetch-apify-docs.js';
 9 | import { getActorOutput } from './get-actor-output.js';
10 | import { getHtmlSkeleton } from './get-html-skeleton.js';
11 | import { addTool } from './helpers.js';
12 | import { getKeyValueStore, getKeyValueStoreKeys, getKeyValueStoreRecord } from './key_value_store.js';
13 | import { getUserKeyValueStoresList } from './key_value_store_collection.js';
14 | import { getActorRun, getActorRunLog } from './run.js';
15 | import { getUserRunsList } from './run_collection.js';
16 | import { searchApifyDocsTool } from './search-apify-docs.js';
17 | import { searchActors } from './store_collection.js';
18 | 
19 | /* list of tools that can be used without authentication */
20 | export const unauthEnabledTools: string[] = [
21 |     // docs
22 |     searchApifyDocsTool.name,
23 |     fetchApifyDocsTool.name,
24 | ];
25 | 
26 | export const toolCategories = {
27 |     experimental: [
28 |         addTool,
29 |     ],
30 |     actors: [
31 |         fetchActorDetailsTool,
32 |         searchActors,
33 |         callActor,
34 |     ],
35 |     docs: [
36 |         searchApifyDocsTool,
37 |         fetchApifyDocsTool,
38 |     ],
39 |     runs: [
40 |         getActorRun,
41 |         getUserRunsList,
42 |         getActorRunLog,
43 |     ],
44 |     storage: [
45 |         getDataset,
46 |         getDatasetItems,
47 |         getDatasetSchema,
48 |         getActorOutput,
49 |         getKeyValueStore,
50 |         getKeyValueStoreKeys,
51 |         getKeyValueStoreRecord,
52 |         getUserDatasetsList,
53 |         getUserKeyValueStoresList,
54 |     ],
55 |     dev: [
56 |         getHtmlSkeleton,
57 |     ],
58 | };
59 | 
60 | export const toolCategoriesEnabledByDefault: ToolCategory[] = [
61 |     'actors',
62 |     'docs',
63 | ];
64 | 
65 | /**
66 |  * Builds the list of tool categories that are enabled for unauthenticated users.
67 |  * A category is included if all tools in it are in the unauthEnabledTools list.
68 |  */
69 | function buildUnauthEnabledToolCategories(): ToolCategory[] {
70 |     const unauthEnabledToolsSet = new Set(unauthEnabledTools);
71 | 
72 |     return (Object.entries(toolCategories) as [ToolCategory, typeof toolCategories[ToolCategory]][])
73 |         .filter(([, tools]) => {
74 |             // Include category only if all tools are in the unauthEnabledTools list
75 |             return tools.every((tool) => unauthEnabledToolsSet.has(tool.name));
76 |         })
77 |         .map(([category]) => category);
78 | }
79 | 
80 | export const unauthEnabledToolCategories = buildUnauthEnabledToolCategories();
81 | 
82 | export const defaultTools = getExpectedToolsByCategories(toolCategoriesEnabledByDefault);
83 | 
84 | // Export only the tools that are being used
85 | export {
86 |     getActorsAsTools,
87 |     callActorGetDataset,
88 | };
89 | 


--------------------------------------------------------------------------------
/src/input.ts:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Actor input processing.
 3 |  *
 4 |  * Normalizes raw inputs (CLI/env/HTTP) into a consistent `Input` shape.
 5 |  * No tool-loading is done here; we only canonicalize values and preserve
 6 |  * intent via `undefined` (use defaults later) vs empty (explicitly none).
 7 |  */
 8 | import log from '@apify/log';
 9 | 
10 | import type { Input, ToolSelector } from './types.js';
11 | 
12 | // Helpers
13 | // Normalize booleans that may arrive as strings or be undefined.
14 | export function toBoolean(value: unknown, defaultValue: boolean): boolean {
15 |     if (value === undefined) return defaultValue;
16 |     if (typeof value === 'boolean') return value;
17 |     if (typeof value === 'string') return value.toLowerCase() === 'true';
18 |     return defaultValue;
19 | }
20 | 
21 | // Normalize lists from comma-separated strings or arrays.
22 | export function normalizeList(value: string | unknown[] | undefined): string[] | undefined {
23 |     if (value === undefined) return undefined;
24 |     if (Array.isArray(value)) return value.map((s) => String(s).trim()).filter((s) => s !== '');
25 |     const trimmed = String(value).trim();
26 |     if (trimmed === '') return [];
27 |     return trimmed.split(',').map((s) => s.trim()).filter((s) => s !== '');
28 | }
29 | 
30 | /**
31 |  * Normalize user-provided input into a canonical `Input`.
32 |  *
33 |  * Responsibilities:
34 |  * - Coerce `actors`, `tools` from string/array into trimmed arrays ('' → []).
35 |  * - Normalize booleans (including legacy `enableActorAutoLoading`).
36 |  * - Merge `actors` into `tools` so selection lives in one place.
37 |  *
38 |  * Semantics passed to the loader:
39 |  * - `undefined` → use defaults; `[]` → explicitly none.
40 |  */
41 | export function processInput(originalInput: Partial<Input>): Input {
42 |     // Normalize actors (strings and arrays) to a clean array or undefined
43 |     const actors = normalizeList(originalInput.actors) as unknown as string[] | undefined;
44 | 
45 |     // Map deprecated flag to the new one and normalize both to boolean.
46 |     let enableAddingActors: boolean;
47 |     if (originalInput.enableAddingActors === undefined && originalInput.enableActorAutoLoading !== undefined) {
48 |         log.warning('enableActorAutoLoading is deprecated, use enableAddingActors instead');
49 |         enableAddingActors = toBoolean(originalInput.enableActorAutoLoading, false);
50 |     } else {
51 |         enableAddingActors = toBoolean(originalInput.enableAddingActors, false);
52 |     }
53 | 
54 |     // Normalize tools (strings/arrays) to a clean array or undefined
55 |     let tools = normalizeList(originalInput.tools as string | string[] | undefined) as unknown as ToolSelector[] | undefined;
56 | 
57 |     // Merge actors into tools. If tools undefined → tools = actors, then remove actors;
58 |     // otherwise append actors to tools.
59 |     // NOTE (future): Actor names contain '/', unlike internal tool names or categories. We could use that to differentiate between the two.
60 |     if (Array.isArray(actors) && actors.length > 0) {
61 |         if (tools === undefined) {
62 |             tools = [...actors] as ToolSelector[];
63 |         } else {
64 |             const currentTools: ToolSelector[] = Array.isArray(tools)
65 |                 ? tools
66 |                 : [tools as ToolSelector];
67 |             tools = [...currentTools, ...actors] as ToolSelector[];
68 |         }
69 |     }
70 | 
71 |     // Return a new object with all properties explicitly defined
72 |     return {
73 |         ...originalInput,
74 |         actors: Array.isArray(actors) && actors.length > 0 && tools !== undefined ? undefined : actors,
75 |         enableAddingActors,
76 |         tools,
77 |     };
78 | }
79 | 


--------------------------------------------------------------------------------
/src/tools/fetch-actor-details.ts:
--------------------------------------------------------------------------------
 1 | import { z } from 'zod';
 2 | 
 3 | import { ApifyClient } from '../apify-client.js';
 4 | import { HelperTools, TOOL_STATUS } from '../const.js';
 5 | import type { InternalToolArgs, ToolEntry, ToolInputSchema } from '../types.js';
 6 | import { fetchActorDetails } from '../utils/actor-details.js';
 7 | import { compileSchema } from '../utils/ajv.js';
 8 | import { buildMCPResponse } from '../utils/mcp.js';
 9 | import { actorDetailsOutputSchema } from './structured-output-schemas.js';
10 | 
11 | const fetchActorDetailsToolArgsSchema = z.object({
12 |     actor: z.string()
13 |         .min(1)
14 |         .describe(`Actor ID or full name in the format "username/name", e.g., "apify/rag-web-browser".`),
15 | });
16 | 
17 | export const fetchActorDetailsTool: ToolEntry = {
18 |     type: 'internal',
19 |     name: HelperTools.ACTOR_GET_DETAILS,
20 |     description: `Get detailed information about an Actor by its ID or full name (format: "username/name", e.g., "apify/rag-web-browser").
21 | This returns the Actor's title, description, URL, README (documentation), input schema, pricing/usage information, and basic stats.
22 | Present the information in a user-friendly Actor card.
23 | 
24 | USAGE:
25 | - Use when a user asks about an Actor’s details, input schema, README, or how to use it.
26 | 
27 | USAGE EXAMPLES:
28 | - user_input: How to use apify/rag-web-browser
29 | - user_input: What is the input schema for apify/rag-web-browser?
30 | - user_input: What is the pricing for apify/instagram-scraper?`,
31 |     inputSchema: z.toJSONSchema(fetchActorDetailsToolArgsSchema) as ToolInputSchema,
32 |     outputSchema: actorDetailsOutputSchema,
33 |     ajvValidate: compileSchema(z.toJSONSchema(fetchActorDetailsToolArgsSchema)),
34 |     annotations: {
35 |         title: 'Fetch Actor details',
36 |         readOnlyHint: true,
37 |         openWorldHint: false,
38 |     },
39 |     call: async (toolArgs: InternalToolArgs) => {
40 |         const { args, apifyToken } = toolArgs;
41 |         const parsed = fetchActorDetailsToolArgsSchema.parse(args);
42 |         const apifyClient = new ApifyClient({ token: apifyToken });
43 |         const details = await fetchActorDetails(apifyClient, parsed.actor);
44 |         if (!details) {
45 |             return buildMCPResponse({
46 |                 texts: [`Actor information for '${parsed.actor}' was not found.
47 | Please verify Actor ID or name format and ensure that the Actor exists.
48 | You can search for available Actors using the tool: ${HelperTools.STORE_SEARCH}.`],
49 |                 isError: true,
50 |                 toolStatus: TOOL_STATUS.SOFT_FAIL,
51 |             });
52 |         }
53 | 
54 |         const actorUrl = `https://apify.com/${details.actorInfo.username}/${details.actorInfo.name}`;
55 |         // Add link to README title
56 |         details.readme = details.readme.replace(/^# /, `# [README](${actorUrl}/readme): `);
57 | 
58 |         const texts = [
59 |             `# Actor information\n${details.actorCard}`,
60 |             `${details.readme}`,
61 |         ];
62 | 
63 |         // Include input schema if it has properties
64 |         if (details.inputSchema.properties || Object.keys(details.inputSchema.properties).length !== 0) {
65 |             texts.push(`# [Input schema](${actorUrl}/input)\n\`\`\`json\n${JSON.stringify(details.inputSchema)}\n\`\`\``);
66 |         }
67 |         // Return the actor card, README, and input schema (if it has non-empty properties) as separate text blocks
68 |         // This allows better formatting in the final output
69 |         const structuredContent = {
70 |             actorInfo: details.actorCardStructured,
71 |             readme: details.readme,
72 |             inputSchema: details.inputSchema,
73 |         };
74 |         return buildMCPResponse({ texts, structuredContent });
75 |     },
76 | } as const;
77 | 


--------------------------------------------------------------------------------
/src/utils/schema-generation.ts:
--------------------------------------------------------------------------------
  1 | import toJsonSchema from 'to-json-schema';
  2 | 
  3 | /**
  4 |  * Minimal JSON Schema typings for array/object schemas used in generateSchemaFromItems
  5 |  */
  6 | export type JsonSchemaProperty = {
  7 |     type: 'string' | 'integer' | 'number' | 'boolean' | 'object' | 'array' | 'null';
  8 |     properties?: Record<string, JsonSchemaProperty>;
  9 |     items?: JsonSchemaProperty;
 10 | };
 11 | 
 12 | export type JsonSchemaObject = {
 13 |     type: 'object';
 14 |     properties: Record<string, JsonSchemaProperty>;
 15 | };
 16 | 
 17 | export type JsonSchemaArray = {
 18 |     type: 'array';
 19 |     items: JsonSchemaObject | JsonSchemaProperty;
 20 | };
 21 | 
 22 | /**
 23 |  * Options for schema generation
 24 |  */
 25 | export type SchemaGenerationOptions = {
 26 |     /** Maximum number of items to use for schema generation. Default is 5. */
 27 |     limit?: number;
 28 |     /** If true, uses only non-empty items and skips hidden fields. Default is true. */
 29 |     clean?: boolean;
 30 |     /** Strategy for handling arrays. "first" uses first item as template, "all" merges all items. Default is "all". */
 31 |     arrayMode?: 'first' | 'all';
 32 | };
 33 | 
 34 | /**
 35 |  * Function to recursively remove empty arrays from an object
 36 |  */
 37 | export function removeEmptyArrays(obj: unknown): unknown {
 38 |     if (Array.isArray(obj)) {
 39 |         // If the item is an array, recursively call removeEmptyArrays on each element.
 40 |         return obj.map((item) => removeEmptyArrays(item));
 41 |     }
 42 | 
 43 |     if (typeof obj !== 'object' || obj === null) {
 44 |         // Return primitives and null values as is.
 45 |         return obj;
 46 |     }
 47 | 
 48 |     // Use reduce to build a new object, excluding keys with empty arrays.
 49 |     return Object.entries(obj).reduce((acc, [key, value]) => {
 50 |         const processedValue = removeEmptyArrays(value);
 51 | 
 52 |         // Exclude the key if the processed value is an empty array.
 53 |         if (Array.isArray(processedValue) && processedValue.length === 0) {
 54 |             return acc;
 55 |         }
 56 | 
 57 |         acc[key] = processedValue;
 58 |         return acc;
 59 |     }, {} as Record<string, unknown>);
 60 | }
 61 | 
 62 | // TODO: write unit tests for this.
 63 | /**
 64 |  * Generates a JSON schema from dataset items with configurable options
 65 |  *
 66 |  * @param datasetItems - Array of dataset items to generate schema from
 67 |  * @param options - Configuration options for schema generation
 68 |  * @returns JSON schema object or null if generation fails
 69 |  */
 70 | export function generateSchemaFromItems(
 71 |     datasetItems: unknown[],
 72 |     options: SchemaGenerationOptions = {},
 73 | ): JsonSchemaArray | null {
 74 |     const {
 75 |         limit = 5,
 76 |         clean = true,
 77 |         arrayMode = 'all',
 78 |     } = options;
 79 | 
 80 |     // Limit the number of items used for schema generation
 81 |     const itemsToUse = datasetItems.slice(0, limit);
 82 | 
 83 |     if (itemsToUse.length === 0) {
 84 |         return null;
 85 |     }
 86 | 
 87 |     // Clean the dataset items by removing empty arrays if requested
 88 |     const processedItems = clean
 89 |         ? itemsToUse.map((item) => removeEmptyArrays(item))
 90 |         : itemsToUse;
 91 | 
 92 |     // Try to generate schema with full options first
 93 |     try {
 94 |         return toJsonSchema(processedItems, {
 95 |             arrays: { mode: arrayMode },
 96 |         }) as JsonSchemaArray;
 97 |     } catch { /* ignore */ }
 98 | 
 99 |     try {
100 |         return toJsonSchema(processedItems, {
101 |             arrays: { mode: 'first' },
102 |         }) as JsonSchemaArray;
103 |     } catch { /* ignore */ }
104 | 
105 |     // If all attempts fail, return null
106 |     return null;
107 | }
108 | 


--------------------------------------------------------------------------------
/src/utils/progress.ts:
--------------------------------------------------------------------------------
  1 | import type { ProgressNotification } from '@modelcontextprotocol/sdk/types.js';
  2 | 
  3 | import type { ApifyClient } from '../apify-client.js';
  4 | import { PROGRESS_NOTIFICATION_INTERVAL_MS } from '../const.js';
  5 | 
  6 | export class ProgressTracker {
  7 |     private progressToken: string | number;
  8 |     private sendNotification: (notification: ProgressNotification) => Promise<void>;
  9 |     private currentProgress = 0;
 10 |     private intervalId?: NodeJS.Timeout;
 11 |     private taskId?: string;
 12 | 
 13 |     constructor(
 14 |         progressToken: string | number,
 15 |         sendNotification: (notification: ProgressNotification) => Promise<void>,
 16 |         taskId?: string,
 17 |     ) {
 18 |         this.progressToken = progressToken;
 19 |         this.sendNotification = sendNotification;
 20 |         this.taskId = taskId;
 21 |     }
 22 | 
 23 |     async updateProgress(message?: string): Promise<void> {
 24 |         this.currentProgress += 1;
 25 | 
 26 |         try {
 27 |             const notification: ProgressNotification = {
 28 |                 method: 'notifications/progress' as const,
 29 |                 params: {
 30 |                     progressToken: this.progressToken,
 31 |                     progress: this.currentProgress,
 32 |                     ...(message && { message }),
 33 |                 },
 34 |                 // Per MCP spec: progress notifications during task execution should include related-task metadata
 35 |                 ...(this.taskId && {
 36 |                     _meta: {
 37 |                         'io.modelcontextprotocol/related-task': {
 38 |                             taskId: this.taskId,
 39 |                         },
 40 |                     },
 41 |                 }),
 42 |             };
 43 | 
 44 |             await this.sendNotification(notification);
 45 |         } catch {
 46 |             // Silent fail - don't break execution
 47 |         }
 48 |     }
 49 | 
 50 |     startActorRunUpdates(runId: string, apifyClient: ApifyClient, actorName: string): void {
 51 |         this.stop();
 52 |         let lastStatus = '';
 53 |         let lastStatusMessage = '';
 54 | 
 55 |         this.intervalId = setInterval(async () => {
 56 |             try {
 57 |                 const run = await apifyClient.run(runId).get();
 58 |                 if (!run) return;
 59 | 
 60 |                 const { status, statusMessage } = run;
 61 | 
 62 |                 // Only send notification if status or statusMessage changed
 63 |                 if (status !== lastStatus || statusMessage !== lastStatusMessage) {
 64 |                     lastStatus = status;
 65 |                     lastStatusMessage = statusMessage || '';
 66 | 
 67 |                     const message = statusMessage
 68 |                         ? `${actorName}: ${statusMessage}`
 69 |                         : `${actorName}: ${status}`;
 70 | 
 71 |                     await this.updateProgress(message);
 72 | 
 73 |                     // Stop polling if Actor finished
 74 |                     if (status === 'SUCCEEDED' || status === 'FAILED' || status === 'ABORTED' || status === 'TIMED-OUT') {
 75 |                         this.stop();
 76 |                     }
 77 |                 }
 78 |             } catch {
 79 |                 // Silent fail - continue polling
 80 |             }
 81 |         }, PROGRESS_NOTIFICATION_INTERVAL_MS);
 82 |     }
 83 | 
 84 |     stop(): void {
 85 |         if (this.intervalId) {
 86 |             clearInterval(this.intervalId);
 87 |             this.intervalId = undefined;
 88 |         }
 89 |     }
 90 | }
 91 | 
 92 | export function createProgressTracker(
 93 |     progressToken: string | number | undefined,
 94 |     sendNotification: ((notification: ProgressNotification) => Promise<void>) | undefined,
 95 |     taskId?: string,
 96 | ): ProgressTracker | null {
 97 |     if (!progressToken || !sendNotification) {
 98 |         return null;
 99 |     }
100 | 
101 |     return new ProgressTracker(progressToken, sendNotification, taskId);
102 | }
103 | 


--------------------------------------------------------------------------------
/tests/unit/utils.html.test.ts:
--------------------------------------------------------------------------------
 1 | import { describe, expect, it } from 'vitest';
 2 | 
 3 | import { stripHtml } from '../../src/utils/html.js';
 4 | 
 5 | describe('stripHtml', () => {
 6 |     it('should remove unwanted attributes and keep allowed ones', () => {
 7 |         const input = '<div class="test" id="myDiv" style="color:red" data-value="123" href="http://example.com">Content</div>';
 8 |         const expected = '<div class="test" id="myDiv" data-value="123">Content</div>';
 9 |         expect(stripHtml(input)).toBe(expected);
10 |     });
11 | 
12 |     it('should remove all attributes except allowed ones', () => {
13 |         const input = '<a href="http://example.com" onclick="alert()" title="Link" name="test">Link</a>';
14 |         const expected = '<a href="http://example.com" title="Link" name="test">Link</a>';
15 |         expect(stripHtml(input)).toBe(expected);
16 |     });
17 | 
18 |     it('should keep data-* attributes', () => {
19 |         const input = '<div data-toggle="modal" data-id="123" class="btn">Button</div>';
20 |         const expected = '<div data-toggle="modal" data-id="123" class="btn">Button</div>';
21 |         expect(stripHtml(input)).toBe(expected);
22 |     });
23 | 
24 |     it('should remove style, script, and other unwanted tags', () => {
25 |         const input = '<html><head><style>body { color: red; }</style></head><body><script>alert("test");</script>Content</body></html>';
26 |         const expected = '<html><head></head><body>Content</body></html>';
27 |         expect(stripHtml(input)).toBe(expected);
28 |     });
29 | 
30 |     it('should remove noscript, iframe, svg, canvas, math tags', () => {
31 |         const input = '<div><noscript>JS disabled</noscript><iframe src="http://example.com"></iframe><svg><circle></circle></svg><canvas></canvas><math></math><p>Text</p></div>';
32 |         const expected = '<div><p>Text</p></div>';
33 |         expect(stripHtml(input)).toBe(expected);
34 |     });
35 | 
36 |     it('should remove HTML comments', () => {
37 |         const input = '<div><!-- This is a comment --><p>Content</p></div>';
38 |         const expected = '<div><p>Content</p></div>';
39 |         expect(stripHtml(input)).toBe(expected);
40 |     });
41 | 
42 |     it('should remove base64 encoded images', () => {
43 |         const input = '<div><img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==" '
44 |             + 'alt="test"><p>Text</p></div>';
45 |         const expected = '<div><p>Text</p></div>';
46 |         expect(stripHtml(input)).toBe(expected);
47 |     });
48 | 
49 |     it('should keep regular images with http src', () => {
50 |         const input = '<img src="http://example.com/image.png" alt="Image" class="img">';
51 |         const expected = '<img src="http://example.com/image.png" alt="Image" class="img">';
52 |         expect(stripHtml(input)).toBe(expected);
53 |     });
54 | 
55 |     it('should collapse multiple spaces and remove spaces between tags', () => {
56 |         const input = '<div>  <p>   Text   </p>  </div>';
57 |         const expected = '<div><p> Text </p></div>';
58 |         expect(stripHtml(input)).toBe(expected);
59 |     });
60 | 
61 |     it('should trim the result', () => {
62 |         const input = '  <div>Content</div>  ';
63 |         const expected = '<div>Content</div>';
64 |         expect(stripHtml(input)).toBe(expected);
65 |     });
66 | 
67 |     it('should handle empty string', () => {
68 |         expect(stripHtml('')).toBe('');
69 |     });
70 | 
71 |     it('should handle plain text', () => {
72 |         const input = 'Just plain text';
73 |         expect(stripHtml(input)).toBe('Just plain text');
74 |     });
75 | 
76 |     it('should handle malformed HTML', () => {
77 |         const input = '<div><p>Unclosed tag';
78 |         const expected = '<div><p>Unclosed tag</p></div>';
79 |         expect(stripHtml(input)).toBe(expected);
80 |     });
81 | 
82 |     it('should handle nested elements with mixed attributes', () => {
83 |         const input = '<div class="container" style="margin:0"><a href="http://test.com" onclick="return false" data-type="link">Link</a></div>';
84 |         const expected = '<div class="container"><a href="http://test.com" data-type="link">Link</a></div>';
85 |         expect(stripHtml(input)).toBe(expected);
86 |     });
87 | });
88 | 


--------------------------------------------------------------------------------
/src/utils/generic.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Parses a comma-separated string into an array of trimmed strings.
  3 |  * Empty strings are filtered out after trimming.
  4 |  *
  5 |  * @param input - The comma-separated string to parse. If undefined, returns an empty array.
  6 |  * @returns An array of trimmed, non-empty strings.
  7 |  * @example
  8 |  * parseCommaSeparatedList("a, b, c"); // ["a", "b", "c"]
  9 |  * parseCommaSeparatedList("a, , b"); // ["a", "b"]
 10 |  */
 11 | export function parseCommaSeparatedList(input?: string): string[] {
 12 |     if (!input) {
 13 |         return [];
 14 |     }
 15 |     return input.split(',').map((s) => s.trim()).filter((s) => s.length > 0);
 16 | }
 17 | 
 18 | /**
 19 |  * Parses a query parameter that can be either a string or an array of strings.
 20 |  * Handles comma-separated values in strings and filters out empty values.
 21 |  *
 22 |  * @param param - A query parameter that can be a string, array of strings, or undefined
 23 |  * @returns An array of trimmed, non-empty strings
 24 |  * @example
 25 |  * parseQueryParamList("a,b,c"); // ["a", "b", "c"]
 26 |  * parseQueryParamList(["a", "b"]); // ["a", "b"]
 27 |  * parseQueryParamList(undefined); // []
 28 |  */
 29 | export function parseQueryParamList(param?: string | string[]): string[] {
 30 |     if (!param) {
 31 |         return [];
 32 |     }
 33 |     if (Array.isArray(param)) {
 34 |         return param.flatMap((item) => parseCommaSeparatedList(item));
 35 |     }
 36 |     return parseCommaSeparatedList(param);
 37 | }
 38 | 
 39 | /**
 40 |  * Recursively gets the value in a nested object for each key in the keys array.
 41 |  * Each key can be a dot-separated path (e.g. 'a.b.c').
 42 |  * Returns an object mapping each key to its resolved value (or undefined if not found).
 43 |  *
 44 |  * @example
 45 |  * const obj = { a: { b: { c: 42 } }, nested: { d: 100 } };
 46 |  * const value = getValuesByDotKeys(obj, ['a.b.c', 'a.b.d', 'nested']);
 47 |  * value; // { 'a.b.c': 42, 'a.b.d': undefined, 'nested': { d: 100 } }
 48 |  */
 49 | export function getValuesByDotKeys(obj: Record<string, unknown>, keys: string[]): Record<string, unknown> {
 50 |     const result: Record<string, unknown> = {};
 51 |     for (const key of keys) {
 52 |         const path = key.split('.');
 53 |         let current: unknown = obj;
 54 |         for (const segment of path) {
 55 |             if (
 56 |                 current !== null
 57 |                 && typeof current === 'object'
 58 |                 && Object.prototype.hasOwnProperty.call(current, segment)
 59 |             ) {
 60 |                 // Use index signature to avoid 'any' and type errors
 61 |                 current = (current as Record<string, unknown>)[segment];
 62 |             } else {
 63 |                 current = undefined;
 64 |                 break;
 65 |             }
 66 |         }
 67 |         result[key] = current;
 68 |     }
 69 |     return result;
 70 | }
 71 | 
 72 | /**
 73 |  * Validates whether a given string is a well-formed URL.
 74 |  *
 75 |  * Allows only valid HTTP or HTTPS URLs.
 76 |  */
 77 | export function isValidHttpUrl(urlString: string): boolean {
 78 |     if (!urlString.startsWith('http://') && !urlString.startsWith('https://')) {
 79 |         return false;
 80 |     }
 81 |     try {
 82 |         /* eslint-disable no-new */
 83 |         new URL(urlString);
 84 |         return true;
 85 |     } catch {
 86 |         return false;
 87 |     }
 88 | }
 89 | 
 90 | /**
 91 |  * Parses a boolean value from a string, boolean, null, or undefined.
 92 |  * Accepts 'true', '1' as true, 'false', '0' as false.
 93 |  * If value is already a boolean, returns it directly.
 94 |  * Returns undefined if the value is not a recognized boolean string or is null/undefined/empty string.
 95 |  */
 96 | export function parseBooleanFromString(value: string | boolean | undefined | null): boolean | undefined {
 97 |     // If already a boolean, return it directly
 98 |     if (typeof value === 'boolean') {
 99 |         return value;
100 |     }
101 |     // Handle undefined/null
102 |     if (value === undefined || value === null) {
103 |         return undefined;
104 |     }
105 |     // Handle empty string (after trim)
106 |     const normalized = value.toLowerCase().trim();
107 |     if (normalized === '') {
108 |         return undefined;
109 |     }
110 |     if (normalized === 'true' || normalized === '1') {
111 |         return true;
112 |     }
113 |     if (normalized === 'false' || normalized === '0') {
114 |         return false;
115 |     }
116 |     return undefined;
117 | }
118 | 


--------------------------------------------------------------------------------
/eslint.config.mjs:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * ESLint Configuration
 3 |  *
 4 |  * This configuration follows the apify-core style and uses the shared @apify/eslint-config package.
 5 |  * It follows the shared config as much as possible, only adding project-specific overrides where necessary.
 6 |  *
 7 |  * The shared config provides:
 8 |  * - Import ordering via simple-import-sort/imports (groups: side effects, node:, external, @apify/, @apify-packages/, relative)
 9 |  * - max-len rule (160 chars, ignores URLs and template literals)
10 |  * - TypeScript-specific rules and best practices
11 |  *
12 |  * Project-specific overrides:
13 |  * - import/no-extraneous-dependencies: Adds vitest.config.ts and evals/** patterns
14 |  * - @typescript-eslint/consistent-type-definitions: Prefers 'type' over 'interface' (use interface only for class implementations)
15 |  * - @typescript-eslint/no-unused-vars: Detects unused variables, functions, and parameters (allows _ prefix)
16 |  * - import/no-default-export: Allows default exports in config files
17 |  */
18 | import apifyTypeScriptConfig from '@apify/eslint-config/ts.js';
19 | 
20 | export default [
21 |     {
22 |         // Ignores must be defined first in flat config format
23 |         // These directories/files are excluded from linting
24 |         ignores: [
25 |             '**/dist', // Build output directory
26 |             '**/.venv', // Python virtual environment (if present)
27 |             'evals/**', // Evaluation scripts directory
28 |         ],
29 |     },
30 |     // Apply the shared Apify TypeScript ESLint configuration
31 |     // This includes TypeScript-specific rules, import ordering, and other best practices
32 |     ...apifyTypeScriptConfig,
33 |     {
34 |         rules: {
35 |             // Prevent importing devDependencies in production code
36 |             // This helps catch accidental imports of test/build tools in source code
37 |             'import/no-extraneous-dependencies': [
38 |                 'error',
39 |                 {
40 |                     // Allow importing devDependencies in these specific file patterns:
41 |                     devDependencies: [
42 |                         '**/eslint.config.mjs', // ESLint config files
43 |                         '**/vitest.config.ts', // Vitest config files
44 |                         '**/*.test.{js,ts,jsx,tsx}', // Test files
45 |                         '**/{test,tests}/**/*.{js,ts,jsx,tsx,mjs,mts,cjs,cts}', // Test directories
46 |                         'evals/**/*.{js,ts,jsx,tsx,mjs,mts,cjs,cts}', // Evaluation scripts
47 |                     ],
48 |                 },
49 |             ],
50 |         },
51 |         languageOptions: {
52 |             // Use ES modules (import/export syntax)
53 |             sourceType: 'module',
54 |             parserOptions: {
55 |                 // Use the ESLint-specific tsconfig that includes test files
56 |                 // This ensures TypeScript-aware linting works for all files
57 |                 project: './tsconfig.eslint.json',
58 |             },
59 |         },
60 |     },
61 |     // TypeScript-specific rules (applied only to .ts files)
62 |     // These rules require the @typescript-eslint plugin which is included in apifyTypeScriptConfig
63 |     {
64 |         files: ['**/*.ts', '**/*.tsx'],
65 |         rules: {
66 |             // Prefer 'type' over 'interface' for flexibility
67 |             // Use 'interface' only when required for class implementations (implements)
68 |             '@typescript-eslint/consistent-type-definitions': ['error', 'type'],
69 |             // Detect unused variables, functions, and parameters
70 |             // This prevents dead code and helps maintain clean code
71 |             '@typescript-eslint/no-unused-vars': [
72 |                 'error',
73 |                 {
74 |                     // Allow variables starting with underscore (intentionally unused)
75 |                     argsIgnorePattern: '^_',
76 |                     // Allow destructured variables starting with underscore
77 |                     destructuredArrayIgnorePattern: '^_',
78 |                     // Allow variables starting with underscore
79 |                     varsIgnorePattern: '^_',
80 |                 },
81 |             ],
82 |         },
83 |     },
84 |     // Override rules for configuration files
85 |     // Config files (like this one) typically use default exports, which is acceptable
86 |     {
87 |         files: ['**/eslint.config.mjs', '**/vitest.config.ts'],
88 |         rules: {
89 |             // Allow default exports in config files (standard practice for config files)
90 |             'import/no-default-export': 'off',
91 |         },
92 |     },
93 | ];
94 | 


--------------------------------------------------------------------------------
/src/tools/build.ts:
--------------------------------------------------------------------------------
  1 | import type { ApifyClient } from '../apify-client.js';
  2 | import { ACTOR_README_MAX_LENGTH } from '../const.js';
  3 | import type {
  4 |     ActorDefinitionPruned,
  5 |     ActorDefinitionWithDesc,
  6 |     ActorDefinitionWithInfo,
  7 |     SchemaProperties,
  8 | } from '../types.js';
  9 | 
 10 | /**
 11 |  * Get Actor input schema by Actor name.
 12 |  * First, fetch the Actor details to get the default build tag and buildId.
 13 |  * Then, fetch the build details and return actorName, description, and input schema.
 14 |  * @param {string} actorIdOrName - Actor ID or Actor full name.
 15 |  * @param {ApifyClient} apifyClient - The Apify client instance.
 16 |  * @param {number} limit - Truncate the README to this limit.
 17 |  * @returns {Promise<ActorDefinitionWithInfo | null>} - The Actor definition with info or null if not found.
 18 |  */
 19 | export async function getActorDefinition(
 20 |     actorIdOrName: string,
 21 |     apifyClient: ApifyClient,
 22 |     limit: number = ACTOR_README_MAX_LENGTH,
 23 | ): Promise<ActorDefinitionWithInfo | null> {
 24 |     const actorClient = apifyClient.actor(actorIdOrName);
 25 |     try {
 26 |         // Fetch Actor details
 27 |         const actor = await actorClient.get();
 28 |         if (!actor) {
 29 |             return null;
 30 |         }
 31 | 
 32 |         const defaultBuildClient = await actorClient.defaultBuild();
 33 |         const buildDetails = await defaultBuildClient.get();
 34 | 
 35 |         if (buildDetails?.actorDefinition) {
 36 |             const actorDefinitions = buildDetails?.actorDefinition as ActorDefinitionWithDesc;
 37 |             // We set actorDefinition ID to Actor ID
 38 |             actorDefinitions.id = actor.id;
 39 |             actorDefinitions.readme = truncateActorReadme(actorDefinitions.readme || '', limit);
 40 |             actorDefinitions.description = actor.description || '';
 41 |             actorDefinitions.actorFullName = `${actor.username}/${actor.name}`;
 42 |             actorDefinitions.defaultRunOptions = actor.defaultRunOptions;
 43 |             // Pass pictureUrl from actor object (untyped property but present in API response)
 44 |             (actorDefinitions as Record<string, unknown>).pictureUrl = (actor as unknown as Record<string, unknown>).pictureUrl;
 45 |             return {
 46 |                 definition: pruneActorDefinition(actorDefinitions),
 47 |                 info: actor,
 48 |             };
 49 |         }
 50 |         return null;
 51 |     } catch (error) {
 52 |         // Check if it's a "not found" error (404 or 400 status codes)
 53 |         const isNotFound = typeof error === 'object'
 54 |             && error !== null
 55 |             && 'statusCode' in error
 56 |             && (error.statusCode === 404 || error.statusCode === 400);
 57 | 
 58 |         if (isNotFound) {
 59 |             // Return null for not found - caller will log appropriately
 60 |             return null;
 61 |         }
 62 | 
 63 |         // For server errors, throw the original error (preserve error type)
 64 |         // Caller should catch and log
 65 |         throw error;
 66 |     }
 67 | }
 68 | function pruneActorDefinition(response: ActorDefinitionWithDesc): ActorDefinitionPruned {
 69 |     return {
 70 |         id: response.id,
 71 |         actorFullName: response.actorFullName || '',
 72 |         buildTag: response?.buildTag || '',
 73 |         readme: response?.readme || '',
 74 |         input: response?.input && 'type' in response.input && 'properties' in response.input
 75 |             ? {
 76 |                 ...response.input,
 77 |                 type: response.input.type as string,
 78 |                 properties: response.input.properties as Record<string, SchemaProperties>,
 79 |             }
 80 |             : undefined,
 81 |         description: response.description,
 82 |         defaultRunOptions: response.defaultRunOptions,
 83 |         webServerMcpPath: 'webServerMcpPath' in response ? response.webServerMcpPath as string : undefined,
 84 |         pictureUrl: 'pictureUrl' in response ? response.pictureUrl as string | undefined : undefined,
 85 |     };
 86 | }
 87 | /** Prune Actor README if it is too long
 88 |  * If the README is too long
 89 |  * - We keep the README as it is up to the limit.
 90 |  * - After the limit, we keep heading only
 91 |  * - We add a note that the README was truncated because it was too long.
 92 |  */
 93 | function truncateActorReadme(readme: string, limit = ACTOR_README_MAX_LENGTH): string {
 94 |     if (readme.length <= limit) {
 95 |         return readme;
 96 |     }
 97 |     const readmeFirst = readme.slice(0, limit);
 98 |     const readmeRest = readme.slice(limit);
 99 |     const lines = readmeRest.split('\n');
100 |     const prunedReadme = lines.filter((line) => line.startsWith('#'));
101 |     return `${readmeFirst}\n\nREADME was truncated because it was too long. Remaining headers:\n${prunedReadme.join(', ')}`;
102 | }
103 | 


--------------------------------------------------------------------------------
/src/mcp/client.ts:
--------------------------------------------------------------------------------
  1 | import { Client } from '@modelcontextprotocol/sdk/client/index.js';
  2 | import { SSEClientTransport } from '@modelcontextprotocol/sdk/client/sse.js';
  3 | import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
  4 | 
  5 | import log from '@apify/log';
  6 | 
  7 | import { TimeoutError } from '../errors.js';
  8 | import { logHttpError } from '../utils/logging.js';
  9 | import { ACTORIZED_MCP_CONNECTION_TIMEOUT_MSEC } from './const.js';
 10 | import { getMCPServerID } from './utils.js';
 11 | 
 12 | /**
 13 |  * Creates and connects a ModelContextProtocol client.
 14 |  * First tries streamable HTTP transport, then falls back to SSE transport.
 15 |  */
 16 | export async function connectMCPClient(
 17 |     url: string, token: string,
 18 | ): Promise<Client | null> {
 19 |     let client: Client;
 20 |     try {
 21 |         client = await createMCPStreamableClient(url, token);
 22 |         return client;
 23 |     } catch (error) {
 24 |         // If streamable HTTP transport fails on not timeout error, continue with SSE transport
 25 |         if (error instanceof TimeoutError) {
 26 |             log.warning('Connection to MCP server using streamable HTTP transport timed out', { url });
 27 |             return null;
 28 |         }
 29 | 
 30 |         // If streamable HTTP transport fails, fall back to SSE transport
 31 |         log.debug('Streamable HTTP transport failed, falling back to SSE transport', {
 32 |             url,
 33 |         });
 34 |     }
 35 | 
 36 |     try {
 37 |         client = await createMCPSSEClient(url, token);
 38 |         return client;
 39 |     } catch (error) {
 40 |         if (error instanceof TimeoutError) {
 41 |             log.warning('Connection to MCP server using SSE transport timed out', { url });
 42 |             return null;
 43 |         }
 44 |         logHttpError(error, 'Failed to connect to MCP server using SSE transport', { url, cause: error });
 45 |         throw error;
 46 |     }
 47 | }
 48 | 
 49 | async function withTimeout<T>(millis: number, promise: Promise<T>): Promise<T> {
 50 |     let timeoutPid: NodeJS.Timeout;
 51 |     const timeout = new Promise<never>((_resolve, reject) => {
 52 |         timeoutPid = setTimeout(
 53 |             () => reject(new TimeoutError(`Timed out after ${millis} ms.`)),
 54 |             millis,
 55 |         );
 56 |     });
 57 | 
 58 |     return Promise.race([
 59 |         promise,
 60 |         timeout,
 61 |     ]).finally(() => {
 62 |         if (timeoutPid) {
 63 |             clearTimeout(timeoutPid);
 64 |         }
 65 |     });
 66 | }
 67 | 
 68 | /**
 69 |  * Creates and connects a ModelContextProtocol client.
 70 |  */
 71 | async function createMCPSSEClient(
 72 |     url: string, token: string,
 73 | ): Promise<Client> {
 74 |     const transport = new SSEClientTransport(
 75 |         new URL(url),
 76 |         {
 77 |             requestInit: {
 78 |                 headers: {
 79 |                     authorization: `Bearer ${token}`,
 80 |                 },
 81 |             },
 82 |             eventSourceInit: {
 83 |                 // The EventSource package augments EventSourceInit with a "fetch" parameter.
 84 |                 // You can use this to set additional headers on the outgoing request.
 85 |                 // Based on this example: https://github.com/modelcontextprotocol/typescript-sdk/issues/118
 86 |                 async fetch(input: Request | URL | string, init?: RequestInit) {
 87 |                     const headers = new Headers(init?.headers || {});
 88 |                     headers.set('authorization', `Bearer ${token}`);
 89 |                     return fetch(input, { ...init, headers });
 90 |                 },
 91 |                 // We have to cast to "any" to use it, since it's non-standard
 92 |             } as any, // eslint-disable-line @typescript-eslint/no-explicit-any
 93 |         });
 94 | 
 95 |     const client = new Client({
 96 |         name: getMCPServerID(url),
 97 |         version: '1.0.0',
 98 |     });
 99 | 
100 |     await withTimeout(ACTORIZED_MCP_CONNECTION_TIMEOUT_MSEC, client.connect(transport));
101 | 
102 |     return client;
103 | }
104 | 
105 | /**
106 |  * Creates and connects a ModelContextProtocol client using the streamable HTTP transport.
107 |  */
108 | async function createMCPStreamableClient(
109 |     url: string, token: string,
110 | ): Promise<Client> {
111 |     const transport = new StreamableHTTPClientTransport(
112 |         new URL(url),
113 |         {
114 |             requestInit: {
115 |                 headers: {
116 |                     authorization: `Bearer ${token}`,
117 |                 },
118 |             },
119 |         });
120 | 
121 |     const client = new Client({
122 |         name: getMCPServerID(url),
123 |         version: '1.0.0',
124 |     });
125 | 
126 |     await withTimeout(ACTORIZED_MCP_CONNECTION_TIMEOUT_MSEC, client.connect(transport));
127 | 
128 |     return client;
129 | }
130 | 


--------------------------------------------------------------------------------
/src/utils/actor.ts:
--------------------------------------------------------------------------------
  1 | import type { ApifyClient } from '../apify-client.js';
  2 | import { getActorMCPServerPath, getActorMCPServerURL } from '../mcp/actors.js';
  3 | import { mcpServerCache } from '../state.js';
  4 | import { getActorDefinition } from '../tools/build.js';
  5 | import type { ActorDefinitionStorage, DatasetItem } from '../types.js';
  6 | import { getValuesByDotKeys } from './generic.js';
  7 | 
  8 | /**
  9 |  * Resolve and cache the MCP server URL for the given Actor.
 10 |  * - Returns a string URL when the Actor exposes an MCP server
 11 |  * - Returns false when the Actor is not an MCP server
 12 |  * Uses a TTL LRU cache to avoid repeated API calls.
 13 |  */
 14 | export async function getActorMcpUrlCached(
 15 |     actorIdOrName: string,
 16 |     apifyClient: ApifyClient,
 17 | ): Promise<string | false> {
 18 |     const cached = mcpServerCache.get(actorIdOrName);
 19 |     if (cached !== null && cached !== undefined) {
 20 |         return cached as string | false;
 21 |     }
 22 | 
 23 |     try {
 24 |         const actorDefinitionWithInfo = await getActorDefinition(actorIdOrName, apifyClient);
 25 |         const definition = actorDefinitionWithInfo?.definition;
 26 |         const mcpPath = definition && getActorMCPServerPath(definition);
 27 |         if (mcpPath) {
 28 |             const url = await getActorMCPServerURL(definition.id, mcpPath);
 29 |             mcpServerCache.set(actorIdOrName, url);
 30 |             return url;
 31 |         }
 32 | 
 33 |         mcpServerCache.set(actorIdOrName, false);
 34 |         return false;
 35 |     } catch (error) {
 36 |         // Check if it's a "not found" error (404 or 400 status codes)
 37 |         const isNotFound = typeof error === 'object'
 38 |             && error !== null
 39 |             && 'statusCode' in error
 40 |             && (error.statusCode === 404 || error.statusCode === 400);
 41 | 
 42 |         if (isNotFound) {
 43 |             // Actor doesn't exist - cache false and return false
 44 |             mcpServerCache.set(actorIdOrName, false);
 45 |             return false;
 46 |         }
 47 |         // Real server error - don't cache, let it propagate
 48 |         throw error;
 49 |     }
 50 | }
 51 | 
 52 | /**
 53 |  * Returns an array of all field names mentioned in the display.properties
 54 |  * of all views in the given ActorDefinitionStorage object.
 55 |  */
 56 | export function getActorDefinitionStorageFieldNames(storage: ActorDefinitionStorage | object): string[] {
 57 |     const fieldSet = new Set<string>();
 58 |     if ('views' in storage && typeof storage.views === 'object' && storage.views !== null) {
 59 |         for (const view of Object.values(storage.views)) {
 60 |             // Collect from display.properties
 61 |             if (view.display && view.display.properties) {
 62 |                 Object.keys(view.display.properties).forEach((field) => fieldSet.add(field));
 63 |             }
 64 |             // Collect from transformation.fields
 65 |             if (view.transformation && Array.isArray(view.transformation.fields)) {
 66 |                 view.transformation.fields.forEach((field) => {
 67 |                     if (typeof field === 'string') fieldSet.add(field);
 68 |                 });
 69 |             }
 70 |         }
 71 |     }
 72 |     return Array.from(fieldSet);
 73 | }
 74 | 
 75 | /**
 76 |  * Ensures the Actor output items are within the character limit.
 77 |  *
 78 |  * First checks if all items fit into the limit, then tries only the important fields and as a last resort
 79 |  * starts removing items until within the limit. In worst scenario return empty array.
 80 |  *
 81 |  * This is primarily used to ensure the tool output does not exceed the LLM context length or tool output limit.
 82 |  */
 83 | export function ensureOutputWithinCharLimit(items: DatasetItem[], importantFields: string[], charLimit: number): DatasetItem[] {
 84 |     // Check if all items fit into the limit
 85 |     const allItemsString = JSON.stringify(items);
 86 |     if (allItemsString.length <= charLimit) {
 87 |         return items;
 88 |     }
 89 | 
 90 |     /**
 91 |      * Items used for the final fallback - removing items until within the limit.
 92 |      * If important fields are defined, use only those fields for that fallback step.
 93 |      */
 94 |     let sourceItems = items;
 95 |     // Try keeping only the important fields
 96 |     if (importantFields.length > 0) {
 97 |         const importantItems = items.map((item) => getValuesByDotKeys(item, importantFields));
 98 |         const importantItemsString = JSON.stringify(importantItems);
 99 |         if (importantItemsString.length <= charLimit) {
100 |             return importantItems;
101 |         }
102 |         sourceItems = importantItems;
103 |     }
104 | 
105 |     // Start removing items until within the limit
106 |     const result: DatasetItem[] = [];
107 |     for (const item of sourceItems) {
108 |         if (JSON.stringify(result.concat(item)).length > charLimit) {
109 |             break;
110 |         }
111 |         result.push(item);
112 |     }
113 |     return result;
114 | }
115 | 


--------------------------------------------------------------------------------
/res/ALGOLIA.md:
--------------------------------------------------------------------------------
  1 | # Algolia Search Response Analysis
  2 | 
  3 | This document contains insights about Algolia API responses for each documentation source. This information helps understand the data structure returned by Algolia and informs decisions about response processing logic.
  4 | 
  5 | ## Key Findings
  6 | 
  7 | ### URL Handling
  8 | - **All hits always have `url_without_anchor`** ✓
  9 | - No need to skip hits for missing URLs (the check can be simplified or removed)
 10 | - The `url_without_anchor` field is always populated across all documentation sources
 11 | 
 12 | ### Anchor/Fragment Field
 13 | - **Initial finding** (search: "api"): No hits had anchors
 14 | - **Updated finding** (search: "actor"): **80% of Apify hits have anchors** (16/20 hits)
 15 | - **Fragment Distribution**:
 16 |   - Apify source: 80% of results include anchors pointing to specific sections
 17 |   - Same URLs appear multiple times with different anchors (e.g., `/actors` page has 4 different section anchors)
 18 |   - Crawlee sources: No anchors (due to `typeFilter: 'lvl1'` which returns page-level only)
 19 | - **Decision**: Fragments are important for Apify source and should be preserved in results
 20 | 
 21 | ### Content Field
 22 | 
 23 | | Source | Has Content | Count | Note |
 24 | |--------|------------|-------|------|
 25 | | **apify** | ✓ YES | 20/20 (100%) | Always has content provided in full text |
 26 | | **crawlee-js** | ✗ NO | 0/3 (0%) | Content is `null`, must use hierarchy |
 27 | | **crawlee-py** | ✗ NO | 0/20 (0%) | Content is `null`, must use hierarchy |
 28 | 
 29 | ### Hierarchy Field
 30 | - **All hits have hierarchy object** with fields: `lvl0`, `lvl1`, `lvl2`, `lvl3`, `lvl4`, `lvl5`, `lvl6`
 31 | - Most of these fields are `null` in responses
 32 | - Only the first 1-2 levels typically contain values
 33 | - **Apify source**: Has `content` field, so hierarchy is used less
 34 | - **Crawlee sources**: No `content` field, must rely on hierarchy for display
 35 | 
 36 | ## Data Structure Examples
 37 | 
 38 | ### Raw Algolia Response (Apify with fragments)
 39 | ```json
 40 | {
 41 |   "url_without_anchor": "https://docs.apify.com/platform/actors",
 42 |   "anchor": "actors-overview",
 43 |   "content": "Actors are serverless cloud programs that can perform anything...",
 44 |   "type": "content",
 45 |   "hierarchy": { "lvl0": "Platform", "lvl1": "Actors", ... }
 46 | }
 47 | ```
 48 | 
 49 | ### Processed Result (After processAlgoliaResponse)
 50 | ```json
 51 | {
 52 |   "url": "https://docs.apify.com/platform/actors#actors-overview",
 53 |   "content": "Actors are serverless cloud programs that can perform anything..."
 54 | }
 55 | ```
 56 | 
 57 | ### Multiple Sections Same Page
 58 | When searching "actor", the Apify index returns multiple hits from the same page with different anchors:
 59 | ```
 60 | https://docs.apify.com/platform/actors#actors-overview
 61 | https://docs.apify.com/platform/actors#actor-components
 62 | https://docs.apify.com/platform/actors#build-actors
 63 | https://docs.apify.com/platform/actors#running-actors
 64 | ```
 65 | 
 66 | This gives LLM access to different sections of the same page.
 67 | 
 68 | ### Crawlee (No fragments)
 69 | ```json
 70 | // Raw Algolia Response
 71 | {
 72 |   "url_without_anchor": "https://crawlee.dev/js/api",
 73 |   "anchor": "",
 74 |   "content": null,
 75 |   "type": "lvl1"
 76 | }
 77 | 
 78 | // Processed Result
 79 | {
 80 |   "url": "https://crawlee.dev/js/api"
 81 |   // Note: no content field since Crawlee doesn't provide it
 82 | }
 83 | ```
 84 | 
 85 | ## Simplification & Design Decisions
 86 | 
 87 | ### Fragment Handling Strategy
 88 | **Decision**: Embed fragments directly in returned URLs instead of returning as separate field.
 89 | 
 90 | **Rationale**:
 91 | - Simpler type definition (`ApifyDocsSearchResult` has only `url` and `content`)
 92 | - LLM receives ready-to-use URLs (e.g., `https://docs.apify.com/actors#build-actors`)
 93 | - Fetch tool already handles fragments correctly (splits on `#`)
 94 | - No need for complex logic to reconstruct URL+fragment
 95 | 
 96 | **Implementation**:
 97 | ```typescript
 98 | // Returns:
 99 | { url: "https://docs.apify.com/actors#build-actors", content: "..." }
100 | 
101 | // Instead of:
102 | { url: "https://docs.apify.com/actors", fragment: "build-actors", content: "..." }
103 | ```
104 | 
105 | ### Content Strategy
106 | - **Use Algolia content directly** - Always populated for Apify, never for Crawlee
107 | - **Remove hierarchy fallback** - Simplified approach, no hierarchy-based content synthesis
108 | - **Result**: 
109 |   - Apify search results include both URL (with anchor) and content
110 |   - Crawlee search results include URL only (content is not available)
111 | 
112 | ### Configuration Cleanup
113 | - Removed `supportsFragments` property from DOCS_SOURCES config
114 | - Simplified typeFilter comments (no longer need to explain fragment filtering)
115 | 
116 | ### Code Simplification
117 | **processAlgoliaResponse() went from ~45 lines to ~20 lines:**
118 | - Removed fragment/hierarchy processing logic
119 | - Removed supportsFragments checks
120 | - URL building: `hit.url_without_anchor + (hit.anchor ? '#' + hit.anchor : '')`
121 | - Content: `hit.content` (use as-is if present)
122 | 


--------------------------------------------------------------------------------
/tests/unit/utils.actor.test.ts:
--------------------------------------------------------------------------------
  1 | import { describe, expect, it } from 'vitest';
  2 | 
  3 | import { ensureOutputWithinCharLimit, getActorDefinitionStorageFieldNames } from '../../src/utils/actor.js';
  4 | 
  5 | describe('getActorDefinitionStorageFieldNames', () => {
  6 |     it('should return an array of field names from a single view (display.properties and transformation.fields)', () => {
  7 |         const storage = {
  8 |             views: {
  9 |                 view1: {
 10 |                     display: {
 11 |                         properties: {
 12 |                             foo: {},
 13 |                             bar: {},
 14 |                             baz: {},
 15 |                         },
 16 |                     },
 17 |                     transformation: {
 18 |                         fields: ['baz', 'qux', 'extra'],
 19 |                     },
 20 |                 },
 21 |             },
 22 |         };
 23 |         const result = getActorDefinitionStorageFieldNames(storage);
 24 |         expect(result.sort()).toEqual(['bar', 'baz', 'extra', 'foo', 'qux']);
 25 |     });
 26 | 
 27 |     it('should return unique field names from multiple views (display.properties and transformation.fields)', () => {
 28 |         const storage = {
 29 |             views: {
 30 |                 view1: {
 31 |                     display: {
 32 |                         properties: {
 33 |                             foo: {},
 34 |                             bar: {},
 35 |                         },
 36 |                     },
 37 |                     transformation: {
 38 |                         fields: ['foo', 'alpha'],
 39 |                     },
 40 |                 },
 41 |                 view2: {
 42 |                     display: {
 43 |                         properties: {
 44 |                             bar: {},
 45 |                             baz: {},
 46 |                         },
 47 |                     },
 48 |                     transformation: {
 49 |                         fields: ['baz', 'beta', 'alpha'],
 50 |                     },
 51 |                 },
 52 |             },
 53 |         };
 54 |         const result = getActorDefinitionStorageFieldNames(storage);
 55 |         expect(result.sort()).toEqual(['alpha', 'bar', 'baz', 'beta', 'foo']);
 56 |     });
 57 | 
 58 |     it('should return an empty array if no properties or fields are present', () => {
 59 |         const storage = {
 60 |             views: {
 61 |                 view1: {
 62 |                     display: {
 63 |                         properties: {},
 64 |                     },
 65 |                     transformation: {
 66 |                         fields: [],
 67 |                     },
 68 |                 },
 69 |             },
 70 |         };
 71 |         const result = getActorDefinitionStorageFieldNames(storage);
 72 |         expect(result).toEqual([]);
 73 |     });
 74 | 
 75 |     it('should handle empty views object', () => {
 76 |         const storage = { views: {} };
 77 |         const result = getActorDefinitionStorageFieldNames(storage);
 78 |         expect(result).toEqual([]);
 79 |     });
 80 | 
 81 |     it('should handle missing transformation or display', () => {
 82 |         const storage = {
 83 |             views: {
 84 |                 view1: {
 85 |                     display: {
 86 |                         properties: { foo: {} },
 87 |                     },
 88 |                 },
 89 |                 view2: {
 90 |                     transformation: {
 91 |                         fields: ['bar', 'baz'],
 92 |                     },
 93 |                 },
 94 |                 view3: {},
 95 |             },
 96 |         };
 97 |         const result = getActorDefinitionStorageFieldNames(storage);
 98 |         expect(result.sort()).toEqual(['bar', 'baz', 'foo']);
 99 |     });
100 | });
101 | 
102 | describe('ensureOutputWithinCharLimit', () => {
103 |     it('should return all items when limit is high', () => {
104 |         const items = [
105 |             { id: 1, name: 'Item 1', value: 'test' },
106 |             { id: 2, name: 'Item 2', value: 'test' },
107 |         ];
108 |         const charLimit = JSON.stringify(items).length;
109 |         const result = ensureOutputWithinCharLimit(items, [], charLimit);
110 |         expect(result).toEqual(items);
111 |     });
112 | 
113 |     it('should use important fields when all items exceed limit', () => {
114 |         const items = [
115 |             { id: 1, name: 'Item 1', description: 'Very long description that makes this item exceed the limit', extra: 'unnecessary data' },
116 |             { id: 2, name: 'Item 2', description: 'Another long description', extra: 'more unnecessary data' },
117 |         ];
118 |         const importantFields = ['id', 'name'];
119 |         const charLimit = 100; // Very small limit
120 |         const result = ensureOutputWithinCharLimit(items, importantFields, charLimit);
121 |         expect(result).toEqual([
122 |             { id: 1, name: 'Item 1' },
123 |             { id: 2, name: 'Item 2' },
124 |         ]);
125 |     });
126 | 
127 |     it('should remove all items when limit is extremely small', () => {
128 |         const items = [
129 |             { id: 1, name: 'Item 1' },
130 |             { id: 2, name: 'Item 2' },
131 |         ];
132 |         const charLimit = 10; // Extremely small limit - even empty array JSON "[]" is 2 chars
133 |         const result = ensureOutputWithinCharLimit(items, [], charLimit);
134 |         expect(result).toEqual([]);
135 |         expect(JSON.stringify(result).length).toBeLessThanOrEqual(charLimit);
136 |     });
137 | });
138 | 


--------------------------------------------------------------------------------
/src/tools/run.ts:
--------------------------------------------------------------------------------
  1 | import { z } from 'zod';
  2 | 
  3 | import { ApifyClient } from '../apify-client.js';
  4 | import { HelperTools, TOOL_STATUS } from '../const.js';
  5 | import type { InternalToolArgs, ToolEntry, ToolInputSchema } from '../types.js';
  6 | import { compileSchema } from '../utils/ajv.js';
  7 | import { buildMCPResponse } from '../utils/mcp.js';
  8 | 
  9 | const getActorRunArgs = z.object({
 10 |     runId: z.string()
 11 |         .min(1)
 12 |         .describe('The ID of the Actor run.'),
 13 | });
 14 | 
 15 | const abortRunArgs = z.object({
 16 |     runId: z.string()
 17 |         .min(1)
 18 |         .describe('The ID of the Actor run to abort.'),
 19 |     gracefully: z.boolean().optional().describe('If true, the Actor run will abort gracefully with a 30-second timeout.'),
 20 | });
 21 | 
 22 | /**
 23 |  * https://docs.apify.com/api/v2/actor-run-get
 24 |  */
 25 | export const getActorRun: ToolEntry = {
 26 |     type: 'internal',
 27 |     name: HelperTools.ACTOR_RUNS_GET,
 28 |     description: `Get detailed information about a specific Actor run by runId.
 29 | The results will include run metadata (status, timestamps), performance stats, and resource IDs (datasetId, keyValueStoreId, requestQueueId).
 30 | 
 31 | USAGE:
 32 | - Use when you need to inspect run status or retrieve associated resource IDs (e.g., datasetId for output).
 33 | 
 34 | USAGE EXAMPLES:
 35 | - user_input: Show details of run y2h7sK3Wc
 36 | - user_input: What is the datasetId for run y2h7sK3Wc?`,
 37 |     inputSchema: z.toJSONSchema(getActorRunArgs) as ToolInputSchema,
 38 |     ajvValidate: compileSchema(z.toJSONSchema(getActorRunArgs)),
 39 |     annotations: {
 40 |         title: 'Get Actor run',
 41 |         readOnlyHint: true,
 42 |         openWorldHint: false,
 43 |     },
 44 |     call: async (toolArgs: InternalToolArgs) => {
 45 |         const { args, apifyToken } = toolArgs;
 46 |         const parsed = getActorRunArgs.parse(args);
 47 |         const client = new ApifyClient({ token: apifyToken });
 48 |         const v = await client.run(parsed.runId).get();
 49 |         if (!v) {
 50 |             return buildMCPResponse({ texts: [`Run with ID '${parsed.runId}' not found.`],
 51 |                 isError: true,
 52 |                 toolStatus: TOOL_STATUS.SOFT_FAIL });
 53 |         }
 54 |         const texts = [`\`\`\`json\n${JSON.stringify(v, null, 2)}\n\`\`\``];
 55 |         return buildMCPResponse({ texts });
 56 |     },
 57 | } as const;
 58 | 
 59 | const GetRunLogArgs = z.object({
 60 |     runId: z.string().describe('The ID of the Actor run.'),
 61 |     lines: z.number()
 62 |         .max(50)
 63 |         .describe('Output the last NUM lines, instead of the last 10')
 64 |         .default(10),
 65 | });
 66 | 
 67 | /**
 68 |  * https://docs.apify.com/api/v2/actor-run-get
 69 |  *  /v2/actor-runs/{runId}/log{?token}
 70 |  */
 71 | export const getActorRunLog: ToolEntry = {
 72 |     type: 'internal',
 73 |     name: HelperTools.ACTOR_RUNS_LOG,
 74 |     description: `Retrieve recent log lines for a specific Actor run.
 75 | The results will include the last N lines of the run's log output (plain text).
 76 | 
 77 | USAGE:
 78 | - Use when you need to inspect recent logs to debug or monitor a run.
 79 | 
 80 | USAGE EXAMPLES:
 81 | - user_input: Show last 20 lines of logs for run y2h7sK3Wc
 82 | - user_input: Get logs for run y2h7sK3Wc`,
 83 |     inputSchema: z.toJSONSchema(GetRunLogArgs) as ToolInputSchema,
 84 |     // It does not make sense to add structured output here since the log API just returns plain text
 85 |     ajvValidate: compileSchema(z.toJSONSchema(GetRunLogArgs)),
 86 |     annotations: {
 87 |         title: 'Get Actor run log',
 88 |         readOnlyHint: true,
 89 |         openWorldHint: false,
 90 |     },
 91 |     call: async (toolArgs: InternalToolArgs) => {
 92 |         const { args, apifyToken } = toolArgs;
 93 |         const parsed = GetRunLogArgs.parse(args);
 94 |         const client = new ApifyClient({ token: apifyToken });
 95 |         const v = await client.run(parsed.runId).log().get() ?? '';
 96 |         const lines = v.split('\n');
 97 |         const text = lines.slice(lines.length - parsed.lines - 1, lines.length).join('\n');
 98 |         return { content: [{ type: 'text', text }] };
 99 |     },
100 | } as const;
101 | 
102 | /**
103 |  * https://docs.apify.com/api/v2/actor-run-abort-post
104 |  */
105 | export const abortActorRun: ToolEntry = {
106 |     type: 'internal',
107 |     name: HelperTools.ACTOR_RUNS_ABORT,
108 |     description: `Abort an Actor run that is currently starting or running.
109 | For runs with status FINISHED, FAILED, ABORTING, or TIMED-OUT, this call has no effect.
110 | The results will include the updated run details after the abort request.
111 | 
112 | USAGE:
113 | - Use when you need to stop a run that is taking too long or misconfigured.
114 | 
115 | USAGE EXAMPLES:
116 | - user_input: Abort run y2h7sK3Wc
117 | - user_input: Gracefully abort run y2h7sK3Wc`,
118 |     inputSchema: z.toJSONSchema(abortRunArgs) as ToolInputSchema,
119 |     ajvValidate: compileSchema(z.toJSONSchema(abortRunArgs)),
120 |     annotations: {
121 |         title: 'Abort Actor run',
122 |         openWorldHint: false,
123 |     },
124 |     call: async (toolArgs: InternalToolArgs) => {
125 |         const { args, apifyToken } = toolArgs;
126 |         const parsed = abortRunArgs.parse(args);
127 |         const client = new ApifyClient({ token: apifyToken });
128 |         const v = await client.run(parsed.runId).abort({ gracefully: parsed.gracefully });
129 |         return { content: [{ type: 'text', text: `\`\`\`json\n${JSON.stringify(v)}\n\`\`\`` }] };
130 |     },
131 | } as const;
132 | 


--------------------------------------------------------------------------------
/src/tools/fetch-apify-docs.ts:
--------------------------------------------------------------------------------
  1 | import { z } from 'zod';
  2 | 
  3 | import log from '@apify/log';
  4 | 
  5 | import { ALLOWED_DOC_DOMAINS, HelperTools, TOOL_STATUS } from '../const.js';
  6 | import { fetchApifyDocsCache } from '../state.js';
  7 | import type { InternalToolArgs, ToolEntry, ToolInputSchema } from '../types.js';
  8 | import { compileSchema } from '../utils/ajv.js';
  9 | import { htmlToMarkdown } from '../utils/html-to-md.js';
 10 | import { logHttpError } from '../utils/logging.js';
 11 | import { buildMCPResponse } from '../utils/mcp.js';
 12 | import { fetchApifyDocsToolOutputSchema } from './structured-output-schemas.js';
 13 | 
 14 | const fetchApifyDocsToolArgsSchema = z.object({
 15 |     url: z.string()
 16 |         .min(1)
 17 |         .describe(`URL of the Apify documentation page to fetch. This should be the full URL, including the protocol (e.g., https://docs.apify.com/).`),
 18 | });
 19 | 
 20 | export const fetchApifyDocsTool: ToolEntry = {
 21 |     type: 'internal',
 22 |     name: HelperTools.DOCS_FETCH,
 23 |     description: `Fetch the full content of an Apify or Crawlee documentation page by its URL.
 24 | Use this after finding a relevant page with the ${HelperTools.DOCS_SEARCH} tool.
 25 | 
 26 | USAGE:
 27 | - Use when you need the complete content of a specific docs page for detailed answers.
 28 | 
 29 | USAGE EXAMPLES:
 30 | - user_input: Fetch https://docs.apify.com/platform/actors/running#builds
 31 | - user_input: Fetch https://docs.apify.com/academy
 32 | - user_input: Fetch https://crawlee.dev/docs/guides/basic-concepts`,
 33 |     inputSchema: z.toJSONSchema(fetchApifyDocsToolArgsSchema) as ToolInputSchema,
 34 |     outputSchema: fetchApifyDocsToolOutputSchema,
 35 |     ajvValidate: compileSchema(z.toJSONSchema(fetchApifyDocsToolArgsSchema)),
 36 |     annotations: {
 37 |         title: 'Fetch Apify docs',
 38 |         readOnlyHint: true,
 39 |         openWorldHint: false,
 40 |     },
 41 |     call: async (toolArgs: InternalToolArgs) => {
 42 |         const { args } = toolArgs;
 43 | 
 44 |         const parsed = fetchApifyDocsToolArgsSchema.parse(args);
 45 |         const url = parsed.url.trim();
 46 |         const urlWithoutFragment = url.split('#')[0];
 47 | 
 48 |         // Allow URLs from Apify and Crawlee documentation
 49 |         const isAllowedDomain = ALLOWED_DOC_DOMAINS.some((domain) => url.startsWith(domain));
 50 | 
 51 |         if (!isAllowedDomain) {
 52 |             log.softFail(`[fetch-apify-docs] Invalid URL domain: ${url}`);
 53 |             return buildMCPResponse({
 54 |                 texts: [`Invalid URL: "${url}".
 55 | Only documentation URLs from Apify and Crawlee are allowed (starting with ${ALLOWED_DOC_DOMAINS.map((d) => `"${d}"`).join(' or ')}).
 56 | Please provide a valid documentation URL. You can find documentation URLs using the ${HelperTools.DOCS_SEARCH} tool.`],
 57 |                 isError: true,
 58 |                 toolStatus: TOOL_STATUS.SOFT_FAIL,
 59 |             });
 60 |         }
 61 | 
 62 |         // Cache URL without fragment to avoid fetching the same page multiple times
 63 |         let markdown = fetchApifyDocsCache.get(urlWithoutFragment);
 64 |         // If the content is not cached, fetch it from the URL
 65 |         if (!markdown) {
 66 |             try {
 67 |                 const response = await fetch(url);
 68 |                 if (!response.ok) {
 69 |                     const error = Object.assign(new Error(`HTTP ${response.status} ${response.statusText}`), {
 70 |                         statusCode: response.status,
 71 |                     });
 72 |                     logHttpError(error, 'Failed to fetch the documentation page', { url, statusText: response.statusText });
 73 |                     // HTTP 4xx = user error (soft_fail), 5xx = server error (will be caught by catch block)
 74 |                     const isUserError = response.status >= 400 && response.status < 500;
 75 |                     return buildMCPResponse({
 76 |                         texts: [`Failed to fetch the documentation page at "${url}".
 77 | HTTP Status: ${response.status} ${response.statusText}.
 78 | Please verify the URL is correct and accessible. You can search for available documentation pages using the ${HelperTools.DOCS_SEARCH} tool.`],
 79 |                         isError: true,
 80 |                         toolStatus: isUserError ? TOOL_STATUS.SOFT_FAIL : TOOL_STATUS.FAILED,
 81 |                     });
 82 |                 }
 83 |                 const html = await response.text();
 84 |                 markdown = htmlToMarkdown(html);
 85 |                 // Cache the processed Markdown content
 86 |                 // Use the URL without fragment as the key to avoid caching same page with different fragments
 87 |                 fetchApifyDocsCache.set(urlWithoutFragment, markdown);
 88 |             } catch (error) {
 89 |                 logHttpError(error, 'Failed to fetch the documentation page', { url });
 90 |                 // Network/fetch errors are typically user errors (bad URL, connectivity issues)
 91 |                 return buildMCPResponse({
 92 |                     texts: [`Failed to fetch the documentation page at "${url}".
 93 | Error: ${error instanceof Error ? error.message : String(error)}.
 94 | Please verify the URL is correct and accessible. You can search for available documentation pages using the ${HelperTools.DOCS_SEARCH} tool.`],
 95 |                     isError: true,
 96 |                     toolStatus: TOOL_STATUS.SOFT_FAIL,
 97 |                 });
 98 |             }
 99 |         }
100 | 
101 |         return buildMCPResponse({ texts: [`Fetched content from ${url}:\n\n${markdown}`], structuredContent: { url, content: markdown } });
102 |     },
103 | } as const;
104 | 


--------------------------------------------------------------------------------
/src/tools/key_value_store.ts:
--------------------------------------------------------------------------------
  1 | import { z } from 'zod';
  2 | 
  3 | import { ApifyClient } from '../apify-client.js';
  4 | import { HelperTools } from '../const.js';
  5 | import type { InternalToolArgs, ToolEntry, ToolInputSchema } from '../types.js';
  6 | import { compileSchema } from '../utils/ajv.js';
  7 | 
  8 | const getKeyValueStoreArgs = z.object({
  9 |     storeId: z.string()
 10 |         .min(1)
 11 |         .describe('Key-value store ID or username~store-name'),
 12 | });
 13 | 
 14 | /**
 15 |  * https://docs.apify.com/api/v2/key-value-store-get
 16 |  */
 17 | export const getKeyValueStore: ToolEntry = {
 18 |     type: 'internal',
 19 |     name: HelperTools.KEY_VALUE_STORE_GET,
 20 |     description: `Get details about a key-value store by ID or username~store-name.
 21 | The results will include store metadata (ID, name, owner, access settings) and usage statistics.
 22 | 
 23 | USAGE:
 24 | - Use when you need to inspect a store to locate records or understand its properties.
 25 | 
 26 | USAGE EXAMPLES:
 27 | - user_input: Show info for key-value store username~my-store
 28 | - user_input: Get details for store adb123`,
 29 |     inputSchema: z.toJSONSchema(getKeyValueStoreArgs) as ToolInputSchema,
 30 |     ajvValidate: compileSchema(z.toJSONSchema(getKeyValueStoreArgs)),
 31 |     annotations: {
 32 |         title: 'Get key-value store',
 33 |         readOnlyHint: true,
 34 |         openWorldHint: false,
 35 |     },
 36 |     call: async (toolArgs: InternalToolArgs) => {
 37 |         const { args, apifyToken } = toolArgs;
 38 |         const parsed = getKeyValueStoreArgs.parse(args);
 39 |         const client = new ApifyClient({ token: apifyToken });
 40 |         const store = await client.keyValueStore(parsed.storeId).get();
 41 |         return { content: [{ type: 'text', text: `\`\`\`json\n${JSON.stringify(store)}\n\`\`\`` }] };
 42 |     },
 43 | } as const;
 44 | 
 45 | const getKeyValueStoreKeysArgs = z.object({
 46 |     storeId: z.string()
 47 |         .min(1)
 48 |         .describe('Key-value store ID or username~store-name'),
 49 |     exclusiveStartKey: z.string()
 50 |         .optional()
 51 |         .describe('All keys up to this one (including) are skipped from the result.'),
 52 |     limit: z.number()
 53 |         .max(10)
 54 |         .optional()
 55 |         .describe('Number of keys to be returned. Maximum value is 1000.'),
 56 | });
 57 | 
 58 | /**
 59 |  * https://docs.apify.com/api/v2/key-value-store-keys-get
 60 |  */
 61 | export const getKeyValueStoreKeys: ToolEntry = {
 62 |     type: 'internal',
 63 |     name: HelperTools.KEY_VALUE_STORE_KEYS_GET,
 64 |     description: `List keys in a key-value store with optional pagination.
 65 | The results will include keys and basic info about stored values (e.g., size).
 66 | Use exclusiveStartKey and limit to paginate.
 67 | 
 68 | USAGE:
 69 | - Use when you need to discover what records exist in a store.
 70 | 
 71 | USAGE EXAMPLES:
 72 | - user_input: List first 100 keys in store username~my-store
 73 | - user_input: Continue listing keys in store a123 from key data.json`,
 74 |     inputSchema: z.toJSONSchema(getKeyValueStoreKeysArgs) as ToolInputSchema,
 75 |     ajvValidate: compileSchema(z.toJSONSchema(getKeyValueStoreKeysArgs)),
 76 |     annotations: {
 77 |         title: 'Get key-value store keys',
 78 |         readOnlyHint: true,
 79 |         openWorldHint: false,
 80 |     },
 81 |     call: async (toolArgs: InternalToolArgs) => {
 82 |         const { args, apifyToken } = toolArgs;
 83 |         const parsed = getKeyValueStoreKeysArgs.parse(args);
 84 |         const client = new ApifyClient({ token: apifyToken });
 85 |         const keys = await client.keyValueStore(parsed.storeId).listKeys({
 86 |             exclusiveStartKey: parsed.exclusiveStartKey,
 87 |             limit: parsed.limit,
 88 |         });
 89 |         return { content: [{ type: 'text', text: `\`\`\`json\n${JSON.stringify(keys)}\n\`\`\`` }] };
 90 |     },
 91 | } as const;
 92 | 
 93 | const getKeyValueStoreRecordArgs = z.object({
 94 |     storeId: z.string()
 95 |         .min(1)
 96 |         .describe('Key-value store ID or username~store-name'),
 97 |     recordKey: z.string()
 98 |         .min(1)
 99 |         .describe('Key of the record to retrieve.'),
100 | });
101 | 
102 | /**
103 |  * https://docs.apify.com/api/v2/key-value-store-record-get
104 |  */
105 | export const getKeyValueStoreRecord: ToolEntry = {
106 |     type: 'internal',
107 |     name: HelperTools.KEY_VALUE_STORE_RECORD_GET,
108 |     description: `Get a value stored in a key-value store under a specific key.
109 | The response preserves the original Content-Encoding; most clients handle decompression automatically.
110 | 
111 | USAGE:
112 | - Use when you need to retrieve a specific record (JSON, text, or binary) from a store.
113 | 
114 | USAGE EXAMPLES:
115 | - user_input: Get record INPUT from store abc123
116 | - user_input: Get record data.json from store username~my-store`,
117 |     inputSchema: z.toJSONSchema(getKeyValueStoreRecordArgs) as ToolInputSchema,
118 |     ajvValidate: compileSchema(z.toJSONSchema(getKeyValueStoreRecordArgs)),
119 |     annotations: {
120 |         title: 'Get key-value store record',
121 |         readOnlyHint: true,
122 |         openWorldHint: false,
123 |     },
124 |     call: async (toolArgs: InternalToolArgs) => {
125 |         const { args, apifyToken } = toolArgs;
126 |         const parsed = getKeyValueStoreRecordArgs.parse(args);
127 |         const client = new ApifyClient({ token: apifyToken });
128 |         const record = await client.keyValueStore(parsed.storeId).getRecord(parsed.recordKey);
129 |         return { content: [{ type: 'text', text: `\`\`\`json\n${JSON.stringify(record)}\n\`\`\`` }] };
130 |     },
131 | } as const;
132 | 


--------------------------------------------------------------------------------
/src/tools/search-apify-docs.ts:
--------------------------------------------------------------------------------
  1 | import { z } from 'zod';
  2 | 
  3 | import { DOCS_SOURCES, HelperTools } from '../const.js';
  4 | import type { InternalToolArgs, ToolEntry, ToolInputSchema } from '../types.js';
  5 | import { compileSchema } from '../utils/ajv.js';
  6 | import { searchDocsBySourceCached } from '../utils/apify-docs.js';
  7 | import { buildMCPResponse } from '../utils/mcp.js';
  8 | import { searchApifyDocsToolOutputSchema } from './structured-output-schemas.js';
  9 | 
 10 | /**
 11 |  * Build docSource parameter description dynamically from DOCS_SOURCES
 12 |  */
 13 | function buildDocSourceDescription(): string {
 14 |     const options = DOCS_SOURCES.map(
 15 |         (idx) => `• "${idx.id}" - ${idx.label}`,
 16 |     ).join('\n');
 17 |     return `Documentation source to search. Defaults to "apify".\n${options}`;
 18 | }
 19 | 
 20 | /**
 21 |  * Build tool description dynamically from DOCS_SOURCES
 22 |  */
 23 | function buildToolDescription(): string {
 24 |     const sources = DOCS_SOURCES.map(
 25 |         (idx) => `• docSource="${idx.id}" - ${idx.label}:\n  ${idx.description}`,
 26 |     ).join('\n\n');
 27 | 
 28 |     return `Search Apify and Crawlee documentation using full-text search.
 29 | 
 30 | You must explicitly select which documentation source to search using the docSource parameter:
 31 | 
 32 | ${sources}
 33 | 
 34 | The results will include the URL of the documentation page (which may include an anchor),
 35 | and a limited piece of content that matches the search query.
 36 | 
 37 | Fetch the full content of the document using the ${HelperTools.DOCS_FETCH} tool by providing the URL.`;
 38 | }
 39 | 
 40 | const searchApifyDocsToolArgsSchema = z.object({
 41 |     docSource: z.enum(
 42 |         DOCS_SOURCES.map((source) => source.id) as [string, ...string[]],
 43 |     )
 44 |         .optional()
 45 |         .default('apify')
 46 |         .describe(buildDocSourceDescription()),
 47 |     query: z.string()
 48 |         .min(1)
 49 |         .describe(
 50 |             `Algolia full-text search query to find relevant documentation pages.
 51 | Use only keywords, do not use full sentences or questions.
 52 | For example, "standby actor" will return documentation pages that contain the words "standby" and "actor".`,
 53 |         ),
 54 |     limit: z.number()
 55 |         .min(1)
 56 |         .max(20) // Algolia does not return more than 20 results anyway
 57 |         .optional()
 58 |         .default(5)
 59 |         .describe(`Maximum number of search results to return. Defaults to 5. Maximum is 20.
 60 | You can increase this limit if you need more results, but keep in mind that the search results are limited to the most relevant pages.`),
 61 |     offset: z.number()
 62 |         .optional()
 63 |         .default(0)
 64 |         .describe(`Offset for the search results. Defaults to 0.
 65 | Use this to paginate through the search results. For example, if you want to get the next 5 results, set the offset to 5 and limit to 5.`),
 66 | });
 67 | 
 68 | export const searchApifyDocsTool: ToolEntry = {
 69 |     type: 'internal',
 70 |     name: HelperTools.DOCS_SEARCH,
 71 |     description: buildToolDescription(),
 72 |     inputSchema: z.toJSONSchema(searchApifyDocsToolArgsSchema) as ToolInputSchema,
 73 |     outputSchema: searchApifyDocsToolOutputSchema,
 74 |     ajvValidate: compileSchema(z.toJSONSchema(searchApifyDocsToolArgsSchema)),
 75 |     annotations: {
 76 |         title: 'Search Apify docs',
 77 |         readOnlyHint: true,
 78 |         openWorldHint: false,
 79 |     },
 80 |     call: async (toolArgs: InternalToolArgs) => {
 81 |         const { args } = toolArgs;
 82 | 
 83 |         const parsed = searchApifyDocsToolArgsSchema.parse(args);
 84 | 
 85 |         const query = parsed.query.trim();
 86 |         const resultsRaw = await searchDocsBySourceCached(parsed.docSource, query);
 87 | 
 88 |         const results = resultsRaw.slice(parsed.offset, parsed.offset + parsed.limit);
 89 | 
 90 |         if (results.length === 0) {
 91 |             const instructions = `No results found for the query "${query}" in the "${parsed.docSource}" documentation source.
 92 | Please try a different query with different keywords, or adjust the limit and offset parameters.
 93 | You can also try using more specific or alternative keywords related to your search topic.`;
 94 |             const structuredContent = {
 95 |                 results: [],
 96 |                 query,
 97 |                 count: 0,
 98 |                 instructions,
 99 |             };
100 |             return buildMCPResponse({ texts: [instructions], structuredContent });
101 |         }
102 | 
103 |         // Instructions for LLM to use the docs fetch tool when retrieving full document content
104 |         const instructions = 'You can use the Apify docs fetch tool to retrieve the full content of a document by its URL.';
105 |         // Actual unstructured text result
106 |         const textResult = `Search results for "${query}" in ${parsed.docSource}:
107 | 
108 | ${results.map((result) => {
109 |             let line = `- Document URL: ${result.url}`;
110 |             if (result.content) {
111 |                 line += `\n  Content: ${result.content}`;
112 |             }
113 |             return line;
114 |         }).join('\n\n')}`;
115 | 
116 |         const structuredContent = {
117 |             results: results.map((result) => ({
118 |                 url: result.url,
119 |                 ...(result.content ? { content: result.content } : {}),
120 |             })),
121 |             query,
122 |             count: results.length,
123 |             instructions,
124 |         };
125 |         // We put the instructions at the end so that they are more likely to be acknowledged by the LLM
126 |         return buildMCPResponse({ texts: [textResult, instructions], structuredContent });
127 |     },
128 | } as const;
129 | 


--------------------------------------------------------------------------------
/src/tools/get-html-skeleton.ts:
--------------------------------------------------------------------------------
  1 | import { z } from 'zod';
  2 | 
  3 | import { ApifyClient } from '../apify-client.js';
  4 | import { HelperTools, RAG_WEB_BROWSER, TOOL_MAX_OUTPUT_CHARS, TOOL_STATUS } from '../const.js';
  5 | import { getHtmlSkeletonCache } from '../state.js';
  6 | import type { InternalToolArgs, ToolEntry, ToolInputSchema } from '../types.js';
  7 | import { compileSchema } from '../utils/ajv.js';
  8 | import { isValidHttpUrl } from '../utils/generic.js';
  9 | import { stripHtml } from '../utils/html.js';
 10 | import { buildMCPResponse } from '../utils/mcp.js';
 11 | 
 12 | type ScrapedPageItem = {
 13 |     crawl: {
 14 |         httpStatusCode: number;
 15 |         httpStatusMessage: string;
 16 |     };
 17 |     metadata: {
 18 |         url: string;
 19 |     };
 20 |     query: string;
 21 |     html?: string;
 22 | }
 23 | 
 24 | const getHtmlSkeletonArgs = z.object({
 25 |     url: z.string()
 26 |         .min(1)
 27 |         .describe('URL of the webpage to retrieve HTML skeleton from.'),
 28 |     enableJavascript: z.boolean()
 29 |         .optional()
 30 |         .default(false)
 31 |         .describe('Whether to enable JavaScript rendering. Enabling this may increase the time taken to retrieve the HTML skeleton.'),
 32 |     chunk: z.number()
 33 |         .optional()
 34 |         .default(1)
 35 |         .describe('Chunk number to retrieve when getting the content. The content is split into chunks to prevent exceeding the maximum tool output length.'),
 36 | });
 37 | 
 38 | export const getHtmlSkeleton: ToolEntry = {
 39 |     type: 'internal',
 40 |     name: HelperTools.GET_HTML_SKELETON,
 41 |     description: `Retrieve the HTML skeleton (clean structure) of a webpage by stripping scripts, styles, and non-essential attributes.
 42 | This keeps the core HTML structure, links, images, and data attributes for analysis. Supports optional JavaScript rendering for dynamic pages.
 43 | 
 44 | The results will include a chunked HTML skeleton if the content is large. Use the chunk parameter to paginate through the output.
 45 | 
 46 | USAGE:
 47 | - Use when you need a clean HTML structure to design selectors or parsers for scraping.
 48 | 
 49 | USAGE EXAMPLES:
 50 | - user_input: Get HTML skeleton for https://example.com
 51 | - user_input: Get next chunk of HTML skeleton for https://example.com (chunk=2)`,
 52 |     inputSchema: z.toJSONSchema(getHtmlSkeletonArgs) as ToolInputSchema,
 53 |     ajvValidate: compileSchema(z.toJSONSchema(getHtmlSkeletonArgs)),
 54 |     annotations: {
 55 |         title: 'Get HTML skeleton',
 56 |         readOnlyHint: true,
 57 |         openWorldHint: true,
 58 |     },
 59 |     call: async (toolArgs: InternalToolArgs) => {
 60 |         const { args, apifyToken } = toolArgs;
 61 |         const parsed = getHtmlSkeletonArgs.parse(args);
 62 | 
 63 |         if (!isValidHttpUrl(parsed.url)) {
 64 |             return buildMCPResponse({
 65 |                 texts: [`The provided URL is not a valid HTTP or HTTPS URL: ${parsed.url}`],
 66 |                 isError: true,
 67 |                 toolStatus: TOOL_STATUS.SOFT_FAIL,
 68 |             });
 69 |         }
 70 | 
 71 |         // Try to get from cache first
 72 |         let strippedHtml = getHtmlSkeletonCache.get(parsed.url);
 73 |         if (!strippedHtml) {
 74 |             // Not in cache, call the Actor for scraping
 75 |             const client = new ApifyClient({ token: apifyToken });
 76 | 
 77 |             const run = await client.actor(RAG_WEB_BROWSER).call({
 78 |                 query: parsed.url,
 79 |                 outputFormats: [
 80 |                     'html',
 81 |                 ],
 82 |                 scrapingTool: parsed.enableJavascript ? 'browser-playwright' : 'raw-http',
 83 |             });
 84 | 
 85 |             const datasetItems = await client.dataset(run.defaultDatasetId).listItems();
 86 |             if (datasetItems.items.length === 0) {
 87 |                 return buildMCPResponse({
 88 |                     texts: [`The scraping Actor (${RAG_WEB_BROWSER}) did not return any output for the URL: ${parsed.url}. Please check the Actor run for more details: ${run.id}`],
 89 |                     isError: true,
 90 |                 });
 91 |             }
 92 | 
 93 |             const firstItem = datasetItems.items[0] as unknown as ScrapedPageItem;
 94 |             if (firstItem.crawl.httpStatusMessage.toLocaleLowerCase() !== 'ok') {
 95 |                 return buildMCPResponse({
 96 |                     texts: [`The scraping Actor (${RAG_WEB_BROWSER}) returned an HTTP status ${firstItem.crawl.httpStatusCode} (${firstItem.crawl.httpStatusMessage}) for the URL: ${parsed.url}. Please check the Actor run for more details: ${run.id}`],
 97 |                     isError: true,
 98 |                 });
 99 |             }
100 | 
101 |             if (!firstItem.html) {
102 |                 return buildMCPResponse({
103 |                     texts: [`The scraping Actor (${RAG_WEB_BROWSER}) did not return any HTML content for the URL: ${parsed.url}. Please check the Actor run for more details: ${run.id}`],
104 |                     isError: true,
105 |                 });
106 |             }
107 | 
108 |             strippedHtml = stripHtml(firstItem.html);
109 |             getHtmlSkeletonCache.set(parsed.url, strippedHtml);
110 |         }
111 | 
112 |         // Pagination logic
113 |         const totalLength = strippedHtml.length;
114 |         const chunkSize = TOOL_MAX_OUTPUT_CHARS;
115 |         const totalChunks = Math.ceil(totalLength / chunkSize);
116 |         const startIndex = (parsed.chunk - 1) * chunkSize;
117 |         const endIndex = Math.min(startIndex + chunkSize, totalLength);
118 |         const chunkContent = strippedHtml.slice(startIndex, endIndex);
119 |         const hasNextChunk = parsed.chunk < totalChunks;
120 | 
121 |         const chunkInfo = `\n\n--- Chunk ${parsed.chunk} of ${totalChunks} ---\n${hasNextChunk ? `Next chunk: ${parsed.chunk + 1}` : 'End of content'}`;
122 | 
123 |         return buildMCPResponse({ texts: [chunkContent + chunkInfo] });
124 |     },
125 | } as const;
126 | 


--------------------------------------------------------------------------------
/tests/integration/internals.test.ts:
--------------------------------------------------------------------------------
  1 | import { InMemoryTaskStore } from '@modelcontextprotocol/sdk/experimental/tasks/stores/in-memory.js';
  2 | import { beforeAll, describe, expect, it } from 'vitest';
  3 | 
  4 | import log from '@apify/log';
  5 | 
  6 | import { ApifyClient } from '../../src/apify-client.js';
  7 | import { ActorsMcpServer } from '../../src/index.js';
  8 | import { addTool } from '../../src/tools/helpers.js';
  9 | import { getActorsAsTools } from '../../src/tools/index.js';
 10 | import { actorNameToToolName } from '../../src/tools/utils.js';
 11 | import type { Input } from '../../src/types.js';
 12 | import { loadToolsFromInput } from '../../src/utils/tools-loader.js';
 13 | import { ACTOR_PYTHON_EXAMPLE } from '../const.js';
 14 | import { expectArrayWeakEquals } from '../helpers.js';
 15 | 
 16 | beforeAll(() => {
 17 |     log.setLevel(log.LEVELS.OFF);
 18 | });
 19 | 
 20 | describe('MCP server internals integration tests', () => {
 21 |     it('should load and restore tools from a tool list', async () => {
 22 |         const actorsMcpServer = new ActorsMcpServer({ setupSigintHandler: false, taskStore: new InMemoryTaskStore() });
 23 |         const apifyClient = new ApifyClient({ token: process.env.APIFY_TOKEN });
 24 |         const initialTools = await loadToolsFromInput({
 25 |             enableAddingActors: true,
 26 |         } as Input, apifyClient);
 27 |         actorsMcpServer.upsertTools(initialTools);
 28 | 
 29 |         // Load new tool
 30 |         const newTool = await getActorsAsTools([ACTOR_PYTHON_EXAMPLE], apifyClient);
 31 |         actorsMcpServer.upsertTools(newTool);
 32 | 
 33 |         // Store the tool name list
 34 |         const names = actorsMcpServer.listAllToolNames();
 35 |         // With enableAddingActors=true and no tools/actors, we should only have add-actor initially
 36 |         const expectedToolNames = [
 37 |             addTool.name,
 38 |             ACTOR_PYTHON_EXAMPLE,
 39 |             'get-actor-output',
 40 |         ];
 41 |         expectArrayWeakEquals(expectedToolNames, names);
 42 | 
 43 |         // Remove all tools
 44 |         actorsMcpServer.tools.clear();
 45 |         expect(actorsMcpServer.listAllToolNames()).toEqual([]);
 46 | 
 47 |         // Load the tool state from the tool name list
 48 |         await actorsMcpServer.loadToolsByName(names, apifyClient);
 49 | 
 50 |         // Check if the tool name list is restored
 51 |         expectArrayWeakEquals(actorsMcpServer.listAllToolNames(), expectedToolNames);
 52 |     });
 53 | 
 54 |     it('should notify tools changed handler on tool modifications', async () => {
 55 |         let latestTools: string[] = [];
 56 |         // With enableAddingActors=true and no tools/actors, seeded set contains only add-actor
 57 |         const numberOfTools = 2;
 58 | 
 59 |         let toolNotificationCount = 0;
 60 |         const onToolsChanged = (tools: string[]) => {
 61 |             latestTools = tools;
 62 |             toolNotificationCount++;
 63 |         };
 64 | 
 65 |         const actorsMCPServer = new ActorsMcpServer({ setupSigintHandler: false, taskStore: new InMemoryTaskStore() });
 66 |         const apifyClient = new ApifyClient({ token: process.env.APIFY_TOKEN });
 67 |         const seeded = await loadToolsFromInput({ enableAddingActors: true } as Input, apifyClient);
 68 |         actorsMCPServer.upsertTools(seeded);
 69 |         actorsMCPServer.registerToolsChangedHandler(onToolsChanged);
 70 | 
 71 |         // Add a new Actor
 72 |         const actor = ACTOR_PYTHON_EXAMPLE;
 73 |         const newTool = await getActorsAsTools([actor], apifyClient);
 74 |         actorsMCPServer.upsertTools(newTool, true);
 75 | 
 76 |         // Check if the notification was received with the correct tools
 77 |         expect(toolNotificationCount).toBe(1);
 78 |         expect(latestTools.length).toBe(numberOfTools + 1);
 79 |         expect(latestTools).toContain(actor);
 80 |         expect(latestTools).toContain(addTool.name);
 81 |         // No default actors are present when only add-actor is enabled by default
 82 | 
 83 |         // Remove the Actor
 84 |         actorsMCPServer.removeToolsByName([actorNameToToolName(actor)], true);
 85 | 
 86 |         // Check if the notification was received with the correct tools
 87 |         expect(toolNotificationCount).toBe(2);
 88 |         expect(latestTools.length).toBe(numberOfTools);
 89 |         expect(latestTools).not.toContain(actor);
 90 |         expect(latestTools).toContain(addTool.name);
 91 |         // No default actors are present by default in this mode
 92 |     });
 93 | 
 94 |     it('should stop notifying after unregistering tools changed handler', async () => {
 95 |         let latestTools: string[] = [];
 96 |         let notificationCount = 0;
 97 |         const numberOfTools = 2;
 98 |         const onToolsChanged = (tools: string[]) => {
 99 |             latestTools = tools;
100 |             notificationCount++;
101 |         };
102 | 
103 |         const actorsMCPServer = new ActorsMcpServer({ setupSigintHandler: false, taskStore: new InMemoryTaskStore() });
104 |         const apifyClient = new ApifyClient({ token: process.env.APIFY_TOKEN });
105 |         const seeded = await loadToolsFromInput({ enableAddingActors: true } as Input, apifyClient);
106 |         actorsMCPServer.upsertTools(seeded);
107 |         actorsMCPServer.registerToolsChangedHandler(onToolsChanged);
108 | 
109 |         // Add a new Actor
110 |         const actor = ACTOR_PYTHON_EXAMPLE;
111 |         const newTool = await getActorsAsTools([actor], apifyClient);
112 |         actorsMCPServer.upsertTools(newTool, true);
113 | 
114 |         // Check if the notification was received
115 |         expect(notificationCount).toBe(1);
116 |         expect(latestTools.length).toBe(numberOfTools + 1);
117 |         expect(latestTools).toContain(actor);
118 | 
119 |         actorsMCPServer.unregisterToolsChangedHandler();
120 | 
121 |         // Remove the Actor
122 |         actorsMCPServer.removeToolsByName([actorNameToToolName(actor)], true);
123 | 
124 |         // Check if the notification was NOT received
125 |         expect(notificationCount).toBe(1);
126 |     });
127 | });
128 | 


--------------------------------------------------------------------------------
/evals/create-dataset.ts:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env tsx
  2 | /**
  3 |  * One-time script to create Phoenix dataset from test cases.
  4 |  * Run this once to upload test cases to Phoenix platform and receive a dataset ID.
  5 |  */
  6 | 
  7 | import { createClient } from '@arizeai/phoenix-client';
  8 | // eslint-disable-next-line import/extensions
  9 | import { createDataset } from '@arizeai/phoenix-client/datasets';
 10 | import dotenv from 'dotenv';
 11 | import yargs from 'yargs';
 12 | // eslint-disable-next-line import/extensions
 13 | import { hideBin } from 'yargs/helpers';
 14 | 
 15 | import log from '@apify/log';
 16 | 
 17 | import { sanitizeHeaderValue, validateEnvVars } from './config.js';
 18 | import { loadTestCases, filterByCategory, filterById, type TestCase } from './evaluation-utils.js';
 19 | 
 20 | // Set log level to debug
 21 | log.setLevel(log.LEVELS.INFO);
 22 | 
 23 | /**
 24 |  * Type for command line arguments
 25 |  */
 26 | type CliArgs = {
 27 |     testCases?: string;
 28 |     category?: string;
 29 |     id?: string;
 30 |     datasetName?: string;
 31 | };
 32 | 
 33 | // Load environment variables from .env file if present
 34 | dotenv.config({ path: '.env' });
 35 | 
 36 | // Parse command line arguments using yargs
 37 | const argv = yargs(hideBin(process.argv))
 38 |     .wrap(null) // Disable automatic wrapping to avoid issues with long lines
 39 |     .usage('Usage: $0 [options]')
 40 |     .env()
 41 |     .option('test-cases', {
 42 |         type: 'string',
 43 |         describe: 'Path to test cases JSON file',
 44 |         default: 'test-cases.json',
 45 |         example: 'custom-test-cases.json',
 46 |     })
 47 |     .option('category', {
 48 |         type: 'string',
 49 |         describe: 'Filter test cases by category. Supports wildcards with * (e.g., search-actors, search-actors-*)',
 50 |         example: 'search-actors',
 51 |     })
 52 |     .option('id', {
 53 |         type: 'string',
 54 |         describe: 'Filter test cases by ID using regex pattern',
 55 |         example: 'instagram.*',
 56 |     })
 57 |     .option('dataset-name', {
 58 |         type: 'string',
 59 |         describe: 'Custom dataset name (overrides auto-generated name)',
 60 |         example: 'my_custom_dataset',
 61 |     })
 62 |     .help('help')
 63 |     .alias('h', 'help')
 64 |     .version(false)
 65 |     .epilogue('Examples:')
 66 |     .epilogue('  $0                                    # Use defaults')
 67 |     .epilogue('  $0 --test-cases custom.json          # Use custom test cases file')
 68 |     .epilogue('  $0 --category search-actors          # Filter by exact category')
 69 |     .epilogue('  $0 --category search-actors-*        # Filter by wildcard pattern')
 70 |     .epilogue('  $0 --id instagram.*                  # Filter by ID regex pattern')
 71 |     .epilogue('  $0 --dataset-name my_dataset         # Custom dataset name')
 72 |     .epilogue('  $0 --test-cases custom.json --category search-actors')
 73 |     .parseSync() as CliArgs;
 74 | 
 75 | 
 76 | async function createDatasetFromTestCases(
 77 |     testCases: TestCase[],
 78 |     datasetName: string,
 79 |     version: string,
 80 | ): Promise<void> {
 81 |     log.info('Creating Phoenix dataset from test cases...');
 82 | 
 83 |     // Validate environment variables
 84 |     if (!validateEnvVars()) {
 85 |         process.exit(1);
 86 |     }
 87 | 
 88 |     log.info(`Loaded ${testCases.length} test cases`);
 89 | 
 90 |     // Convert to format expected by Phoenix
 91 |     const examples = testCases.map((testCase) => ({
 92 |         input: { query: testCase.query, context: testCase.context || '' },
 93 |         output: { expectedTools: testCase.expectedTools?.join(', '), reference: testCase.reference || '' },
 94 |         metadata: { category: testCase.category },
 95 |     }));
 96 | 
 97 |     // Initialize Phoenix client
 98 |     const client = createClient({
 99 |         options: {
100 |             baseUrl: process.env.PHOENIX_BASE_URL!,
101 |             headers: { Authorization: `Bearer ${sanitizeHeaderValue(process.env.PHOENIX_API_KEY)}` },
102 |         },
103 |     });
104 | 
105 |     log.info(`Uploading dataset '${datasetName}' to Phoenix...`);
106 | 
107 |     try {
108 |         const { datasetId } = await createDataset({
109 |             client,
110 |             name: datasetName,
111 |             description: `MCP server dataset: version ${version}`,
112 |             examples,
113 |         });
114 | 
115 |         log.info(`Dataset '${datasetName}' created with ID: ${datasetId}`);
116 |     } catch (error) {
117 |         if (error instanceof Error && error.message.includes('409')) {
118 |             log.error(`❌ Dataset '${datasetName}' already exists in Phoenix!`);
119 |             log.error('');
120 |             log.error('💡 Solutions:');
121 |             log.error('  1. Use --dataset-name to specify a different name:');
122 |             log.error(`     tsx create-dataset.ts --dataset-name ${datasetName}_v2`);
123 |             log.error(`     npm run evals:create-dataset -- --dataset-name ${datasetName}_v2`);
124 |             log.error('  2. Delete the existing dataset from Phoenix dashboard first');
125 |             log.error('');
126 |             log.error(`📋 Technical details: ${error.message}`);
127 |         } else {
128 |             log.error(`Error creating dataset: ${error}`);
129 |         }
130 |         process.exit(1);
131 |     }
132 | }
133 | 
134 | // Run the script
135 | async function main(): Promise<void> {
136 |     try {
137 |         // Load test cases from specified file
138 | 
139 |         const testData = loadTestCases(argv.testCases || 'test-cases.json');
140 |         let { testCases } = testData;
141 | 
142 |         // Apply category filter if specified
143 |         if (argv.category) {
144 |             testCases = filterByCategory(testCases, argv.category);
145 |             log.info(`Filtered to ${testCases.length} test cases in category '${argv.category}'`);
146 |         }
147 | 
148 |         // Apply ID filter if specified
149 |         if (argv.id) {
150 |             testCases = filterById(testCases, argv.id);
151 |             log.info(`Filtered to ${testCases.length} test cases matching ID pattern '${argv.id}'`);
152 |         }
153 | 
154 |         // Determine dataset name
155 |         const datasetName = argv.datasetName || `mcp_server_dataset_v${testData.version}`;
156 | 
157 |         // Create dataset
158 |         await createDatasetFromTestCases(testCases, datasetName, testData.version);
159 |     } catch (error) {
160 |         log.error('Unexpected error:', { error });
161 |         process.exit(1);
162 |     }
163 | }
164 | 
165 | // Run
166 | main()
167 |     .then(() => process.exit())
168 |     .catch((err) => {
169 |         log.error('Unexpected error:', err);
170 |         process.exit(1);
171 |     });
172 | 


--------------------------------------------------------------------------------
/src/utils/apify-docs.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Utilities for searching Apify documentation using Algolia.
  3 |  *
  4 |  * Provides a function to query the Apify docs via Algolia's search API and return structured results.
  5 |  *
  6 |  * @module utils/apify-docs
  7 |  */
  8 | import { algoliasearch } from 'algoliasearch';
  9 | 
 10 | import log from '@apify/log';
 11 | 
 12 | import { DOCS_SOURCES } from '../const.js';
 13 | import { searchApifyDocsCache } from '../state.js';
 14 | import type { ApifyDocsSearchResult } from '../types.js';
 15 | 
 16 | /**
 17 |  * Pool of Algolia search clients, keyed by app ID to handle multiple Algolia accounts.
 18 |  */
 19 | const clientPool: Record<string, ReturnType<typeof algoliasearch>> = {};
 20 | 
 21 | function getAlgoliaClient(appId: string, apiKey: string) {
 22 |     if (!clientPool[appId]) {
 23 |         clientPool[appId] = algoliasearch(appId, apiKey);
 24 |     }
 25 |     return clientPool[appId];
 26 | }
 27 | 
 28 | /**
 29 |  * Represents a single search hit from Algolia's response.
 30 |  */
 31 | type AlgoliaResultHit = {
 32 |     url_without_anchor?: string;
 33 |     anchor?: string;
 34 |     content?: string | null;
 35 |     type?: string;
 36 |     hierarchy?: Record<string, string | null>;
 37 | };
 38 | 
 39 | /**
 40 |  * Represents a single Algolia search result containing hits.
 41 |  */
 42 | type AlgoliaResult = {
 43 |     hits?: AlgoliaResultHit[];
 44 | };
 45 | 
 46 | /**
 47 |  * Builds an Algolia search request with conditional filters based on documentation source configuration.
 48 |  *
 49 |  * @param {object} indexConfig - The documentation source configuration from DOCS_SOURCES
 50 |  * @param {string} query - The search query string
 51 |  * @returns {object} Algolia search request object with index name, query, and conditional filters
 52 |  */
 53 | function prepareAlgoliaRequest(
 54 |     indexConfig: (typeof DOCS_SOURCES)[number],
 55 |     query: string,
 56 |     // eslint-disable-next-line @typescript-eslint/no-explicit-any
 57 | ): any {
 58 |     // eslint-disable-next-line @typescript-eslint/no-explicit-any
 59 |     const searchRequest: any = {
 60 |         indexName: indexConfig.indexName,
 61 |         query: query.trim(),
 62 |     };
 63 | 
 64 |     // Apply filters if configured
 65 |     if ('filters' in indexConfig && indexConfig.filters) {
 66 |         searchRequest.filters = indexConfig.filters;
 67 |     }
 68 | 
 69 |     // Apply type filter if configured (e.g., for Crawlee to filter to lvl1 pages only)
 70 |     if ('typeFilter' in indexConfig && indexConfig.typeFilter) {
 71 |         const typeFilter = `type:${indexConfig.typeFilter}`;
 72 |         if (searchRequest.filters) {
 73 |             // Combine with existing filters using AND
 74 |             searchRequest.filters = `${searchRequest.filters} AND ${typeFilter}`;
 75 |         } else {
 76 |             searchRequest.filters = typeFilter;
 77 |         }
 78 |     }
 79 | 
 80 |     // Apply facet filters if configured
 81 |     if ('facetFilters' in indexConfig && indexConfig.facetFilters) {
 82 |         searchRequest.facetFilters = indexConfig.facetFilters;
 83 |     }
 84 | 
 85 |     return searchRequest;
 86 | }
 87 | 
 88 | /**
 89 |  * Processes Algolia search response and transforms hits into ApifyDocsSearchResult array.
 90 |  *
 91 |  * @param {AlgoliaResult[]} results - Raw Algolia search results
 92 |  * @returns {ApifyDocsSearchResult[]} Processed search results with URL (may include anchor) and optional content
 93 |  */
 94 | function processAlgoliaResponse(results: AlgoliaResult[]): ApifyDocsSearchResult[] {
 95 |     const searchResults: ApifyDocsSearchResult[] = [];
 96 | 
 97 |     for (const result of results) {
 98 |         if (!result.hits?.length) {
 99 |             continue;
100 |         }
101 | 
102 |         for (const hit of result.hits) {
103 |             if (!hit.url_without_anchor) {
104 |                 continue;
105 |             }
106 | 
107 |             // Build URL with anchor if present
108 |             let url = hit.url_without_anchor;
109 |             if (hit.anchor && hit.anchor.trim()) {
110 |                 url += `#${hit.anchor}`;
111 |             }
112 | 
113 |             searchResults.push({
114 |                 url,
115 |                 ...(hit.content ? { content: hit.content } : {}),
116 |             });
117 |         }
118 |     }
119 | 
120 |     return searchResults;
121 | }
122 | 
123 | /**
124 |  * Searches a specific documentation source by ID using Algolia.
125 |  *
126 |  * @param {string} docSource - The documentation source ID ('apify', 'crawlee-js', or 'crawlee-py').
127 |  * @param {string} query - The search query string.
128 |  * @returns {Promise<ApifyDocsSearchResult[]>} Array of search results with URL (may include anchor) and optional content.
129 |  */
130 | export async function searchDocsBySource(
131 |     docSource: string,
132 |     query: string,
133 | ): Promise<ApifyDocsSearchResult[]> {
134 |     const indexConfig = DOCS_SOURCES.find((idx) => idx.id === docSource);
135 | 
136 |     if (!indexConfig) {
137 |         throw new Error(`Unknown documentation source: ${docSource}`);
138 |     }
139 | 
140 |     const client = getAlgoliaClient(indexConfig.appId, indexConfig.apiKey);
141 | 
142 |     const searchRequest = prepareAlgoliaRequest(indexConfig, query);
143 |     const response = await client.search({
144 |         requests: [searchRequest],
145 |     });
146 | 
147 |     const results = response.results as unknown as AlgoliaResult[];
148 |     const searchResults = processAlgoliaResponse(results);
149 | 
150 |     log.info(`[Algolia] Search completed successfully. Found ${searchResults.length} results for "${docSource}"`);
151 |     return searchResults;
152 | }
153 | 
154 | /**
155 |  * Searches a documentation source with caching.
156 |  *
157 |  * @param {string} docSource - The documentation source ID ('apify', 'crawlee-js', or 'crawlee-py').
158 |  * @param {string} query - The search query string.
159 |  * @returns {Promise<ApifyDocsSearchResult[]>} Array of search results with URL (may include anchor) and optional content.
160 |  */
161 | export async function searchDocsBySourceCached(
162 |     docSource: string,
163 |     query: string,
164 | ): Promise<ApifyDocsSearchResult[]> {
165 |     const cacheKey = `${docSource}::${query.trim().toLowerCase()}`;
166 |     const cachedResults = searchApifyDocsCache.get(cacheKey);
167 |     if (cachedResults) {
168 |         log.debug(`[Algolia] Cache hit for key: "${cacheKey}". Returning ${cachedResults.length} cached results`);
169 |         return cachedResults;
170 |     }
171 | 
172 |     log.debug(`[Algolia] Cache miss for key: "${cacheKey}". Executing search...`);
173 |     const results = await searchDocsBySource(docSource, query);
174 |     searchApifyDocsCache.set(cacheKey, results);
175 |     return results;
176 | }
177 | 


--------------------------------------------------------------------------------