├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── deploy
    ├── README.md
    ├── docker-compose.yaml
    └── searxng
    │   ├── settings.yml
    │   └── uwsgi.ini
├── dist
    ├── index.cjs
    ├── index.cjs.map
    ├── index.d.cts
    ├── index.d.ts
    ├── index.js
    └── index.js.map
├── eslint.config.mjs
├── package.json
├── smithery.yaml
├── src
    ├── global.d.ts
    ├── index.ts
    ├── interface.ts
    ├── libs
    │   ├── browser-search
    │   │   ├── engines
    │   │   │   ├── baidu.ts
    │   │   │   ├── bing.ts
    │   │   │   ├── get.ts
    │   │   │   ├── google.ts
    │   │   │   ├── index.ts
    │   │   │   └── sogou.ts
    │   │   ├── index.ts
    │   │   ├── queue.ts
    │   │   ├── readability.ts
    │   │   ├── search.ts
    │   │   ├── types.ts
    │   │   └── utils.ts
    │   └── browser
    │   │   ├── base.ts
    │   │   ├── finder.ts
    │   │   ├── index.ts
    │   │   ├── local.ts
    │   │   ├── remote.ts
    │   │   └── types.ts
    ├── search
    │   ├── bing.ts
    │   ├── duckduckgo.ts
    │   ├── index.ts
    │   ├── local.ts
    │   ├── searxng.ts
    │   └── tavily.ts
    └── tools.ts
└── tsconfig.json


/.gitignore:
--------------------------------------------------------------------------------
 1 | # 依赖目录
 2 | node_modules/
 3 | package-lock.json
 4 | 
 5 | # 日志文件
 6 | logs/
 7 | *.log
 8 | npm-debug.log*
 9 | yarn-debug.log*
10 | yarn-error.log*
11 | 
12 | # ESLint
13 | .eslintcache
14 | 
15 | # 运行时数据
16 | .DS_Store
17 | .env.local
18 | .env.development.local
19 | .env.test.local
20 | .env.production.local
21 | 
22 | # 编辑器目录和文件
23 | .idea/
24 | .vscode/
25 | *.swp
26 | *.swo
27 | 
28 | # TypeScript缓存
29 | *.tsbuildinfo
30 | 
31 | # 覆盖率目录
32 | coverage/
33 | 
34 | # 临时文件
35 | tmp/
36 | temp/ 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile
 2 | FROM node:lts-alpine
 3 | 
 4 | # Set working directory
 5 | WORKDIR /app
 6 | 
 7 | # Copy package files
 8 | COPY package.json package-lock.json* ./
 9 | 
10 | # Install dependencies (skip scripts to speed up build if needed)
11 | RUN npm install --ignore-scripts
12 | 
13 | # Copy remaining source code
14 | COPY . .
15 | 
16 | # Build the project
17 | RUN npm run build
18 | 
19 | # Expose port if needed (not required for MCP using stdio, but helpful for debugging)
20 | # EXPOSE 3000
21 | 
22 | # Command to run the MCP server
23 | CMD ["node", "dist/index.js"]
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 zac_ma.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 🚀 OneSearch MCP Server: Web Search & Crawl & Scraper & Extract
  2 | 
  3 | A Model Context Protocol (MCP) server implementation that integrates with Searxng/Tavily/DuckDuckGo/Bing for web search, local browser search, and scraping capabilities with Firecrawl.
  4 | 
  5 | ## Features
  6 | 
  7 | - Web Search, scrape, crawl and extract content from websites.
  8 | - Support multiple search engines and web scrapers: **SearXNG**, **Firecrawl**, **Tavily**, **DuckDuckGo**, **Bing**, etc.
  9 | - **Local web search** (browser search), support multiple search engines: **Bing**, **Google**, **Baidu**, **Sogou**, etc.
 10 |   - Use `puppeteer-core` to scrape content from websites.
 11 |   - You should have a local browser installed, such as `Chromium`, `Google Chrome`, `Google Chrome Canary`, etc.
 12 |   - Free, no keys required.
 13 | - **Enabled tools:** `one_search`, `one_scrape`, `one_map`
 14 | - Support for self-hosted: SearXNG, Firecrawl, etc. (see [Deploy](./deploy/README.md))
 15 | 
 16 | ## Installation
 17 | 
 18 | ### Installing via Smithery
 19 | 
 20 | To install OneSearch for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@yokingma/one-search):
 21 | 
 22 | ```bash
 23 | npx -y @smithery/cli install @yokingma/one-search --client claude
 24 | ```
 25 | 
 26 | ### Manual Installation
 27 | 
 28 | ```shell
 29 | # Manually install (Optional)
 30 | npm install -g one-search-mcp
 31 | ```
 32 | 
 33 | ```shell
 34 | # using npx
 35 | env SEARCH_API_URL=http://127.0.0.1:8080 FIRECRAWL_API_URL=http://127.0.0.1:3002 npx -y one-search-mcp
 36 | ```
 37 | 
 38 | ## Environment Variables
 39 | 
 40 | **Search Engine:**
 41 | 
 42 | - **SEARCH_PROVIDER** (Optional): The search provider to use, supports `searxng`, `duckduckgo`, `bing`, `tavily`, `local`, default is `local`.
 43 | - **SEARCH_API_URL** (Optional): The URL of the SearxNG API, required for `searxng`.
 44 | - **SEARCH_API_KEY** (Optional): The API key for the search provider, required for `tavily`, `bing`.
 45 | 
 46 | ```ts
 47 | // supported search providers
 48 | export type SearchProvider = 'searxng' | 'duckduckgo' | 'bing' | 'tavily' | 'local';
 49 | ```
 50 | 
 51 | **Firecrawl:**
 52 | 
 53 | - FIRECRAWL_API_URL (Optional): The URL of the Firecrawl API, required for `firecrawl`.
 54 | - FIRECRAWL_API_KEY (Optional): The API key for the Firecrawl API, required for `firecrawl` if using cloud service.
 55 | 
 56 | ## Running on Cursor
 57 | 
 58 | Your `mcp.json` file will look like this:
 59 | 
 60 | ```json
 61 | {
 62 |   "mcpServers": {
 63 |     "one-search-mcp": {
 64 |       "command": "npx",
 65 |       "args": ["-y", "one-search-mcp"],
 66 |       "env": {
 67 |         "SEARCH_PROVIDER": "searxng",
 68 |         "SEARCH_API_URL": "http://127.0.0.1:8080",
 69 |         "SEARCH_API_KEY": "YOUR_API_KEY",
 70 |         "FIRECRAWL_API_URL": "http://127.0.0.1:3002",
 71 |         "FIRECRAWL_API_KEY": "YOUR_API_KEY"
 72 |       }
 73 |     }
 74 |   }
 75 | }
 76 | ```
 77 | 
 78 | ## Running on Windsurf
 79 | 
 80 | Add this to your `./codeium/windsurf/model_config.json` file:
 81 | 
 82 | ```json
 83 | {
 84 |   "mcpServers": {
 85 |     "one-search-mcp": {
 86 |       "command": "npx",
 87 |       "args": ["-y", "one-search-mcp"],
 88 |       "env": {
 89 |         "SEARCH_PROVIDER": "searxng",
 90 |         "SEARCH_API_URL": "http://127.0.0.1:8080",
 91 |         "SEARCH_API_KEY": "YOUR_API_KEY",
 92 |         "FIRECRAWL_API_URL": "http://127.0.0.1:3002",
 93 |         "FIRECRAWL_API_KEY": "YOUR_API_KEY"
 94 |       }
 95 |     }
 96 |   }
 97 | }
 98 | ```
 99 | 
100 | ## Self-host
101 | 
102 | Local deployment of SearXNG and Firecrawl, please refer to [Deploy](./deploy/README.md)
103 | 
104 | ## Troubleshooting
105 | 
106 | - [ReferenceError]: __name is not defined: This is because Puppeteer has problems with `tsx`, [esbuild#1031](https://github.com/evanw/esbuild/issues/1031)
107 | 
108 | ## License
109 | 
110 | MIT License - see [LICENSE](./LICENSE) file for details.
111 | 


--------------------------------------------------------------------------------
/deploy/README.md:
--------------------------------------------------------------------------------
 1 | # Self-hosting Guide (using Docker)
 2 | 
 3 | This document mainly explains how to deploy SearXNG and Firecrawl locally using Docker. You can also use other methods such as APIs provided by cloud services.
 4 | 
 5 | ## Prerequisites
 6 | 
 7 | Before we dive in, make sure you have:
 8 | 
 9 | - Docker installed and running (version 20.10.0 or higher)
10 | - At least 4GB of RAM available for the container
11 | 
12 | > Pro tip: Run `docker info` to check your Docker installation and available resources.
13 | 
14 | ## How to deploy
15 | 
16 | ```bash
17 | git clone https://github.com/yokingma/one-search-mcp.git
18 | cd one-search-mcp/deploy
19 | docker compose up -d
20 | ```
21 | 
22 | Then you can access the server at:
23 | 
24 | - `http://127.0.0.1:8080` for SearXNG
25 | - `http://127.0.0.1:3002` for Firecrawl
26 | 
27 | > Pro tip: If you want to change the port, you can modify the `docker-compose.yaml` file.
28 | 
29 | ## SearXNG (Self-host)
30 | 
31 | Create a new SearXNG instance using Docker, for details see [searxng-docker](https://github.com/searxng/searxng-docker).
32 | 
33 | ## Firecrawl (Self-host)
34 | 
35 | Create a new Firecrawl instance using Docker, for details see [firecrawl-self-host](https://github.com/mendableai/firecrawl/blob/main/SELF_HOST.md).
36 | 


--------------------------------------------------------------------------------
/deploy/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | name: one-search
 2 | 
 3 | x-common-service: &common-service
 4 |   image: docker.cnb.cool/aigc/firecrawl
 5 | 
 6 |   ulimits:
 7 |     nofile:
 8 |       soft: 65535
 9 |       hard: 65535
10 |   networks:
11 |     - backend
12 |   extra_hosts:
13 |     - "host.docker.internal:host-gateway"
14 | 
15 | x-common-env: &common-env
16 |   REDIS_URL: ${REDIS_URL:-redis://redis:6379}
17 |   REDIS_RATE_LIMIT_URL: ${REDIS_URL:-redis://redis:6379}
18 |   PLAYWRIGHT_MICROSERVICE_URL: ${PLAYWRIGHT_MICROSERVICE_URL:-http://playwright-service:3000/scrape}
19 |   USE_DB_AUTHENTICATION: ${USE_DB_AUTHENTICATION}
20 |   OPENAI_API_KEY: ${OPENAI_API_KEY}
21 |   OPENAI_BASE_URL: ${OPENAI_BASE_URL}
22 |   MODEL_NAME: ${MODEL_NAME}
23 |   SLACK_WEBHOOK_URL: ${SLACK_WEBHOOK_URL}
24 |   BULL_AUTH_KEY: ${BULL_AUTH_KEY}
25 |   TEST_API_KEY: ${TEST_API_KEY}
26 |   POSTHOG_API_KEY: ${POSTHOG_API_KEY}
27 |   POSTHOG_HOST: ${POSTHOG_HOST}
28 |   SUPABASE_ANON_TOKEN: ${SUPABASE_ANON_TOKEN}
29 |   SUPABASE_URL: ${SUPABASE_URL}
30 |   SUPABASE_SERVICE_TOKEN: ${SUPABASE_SERVICE_TOKEN}
31 |   SCRAPING_BEE_API_KEY: ${SCRAPING_BEE_API_KEY}
32 |   SELF_HOSTED_WEBHOOK_URL: ${SELF_HOSTED_WEBHOOK_URL}
33 |   SERPER_API_KEY: ${SERPER_API_KEY}
34 |   SEARCHAPI_API_KEY: ${SEARCHAPI_API_KEY}
35 |   LOGGING_LEVEL: ${LOGGING_LEVEL}
36 |   PROXY_SERVER: ${PROXY_SERVER}
37 |   PROXY_USERNAME: ${PROXY_USERNAME}
38 |   PROXY_PASSWORD: ${PROXY_PASSWORD}
39 | 
40 | services:
41 |   searxng:
42 |     image: searxng/searxng:latest
43 |     restart: always
44 |     ports:
45 |       - "127.0.0.1:8080:8080"
46 |     volumes:
47 |       - ./searxng:/etc/searxng:rw
48 |     environment:
49 |       - SEARXNG_BASE_URL=https://${SEARXNG_HOSTNAME:-localhost}/
50 |     networks:
51 |       - backend
52 | 
53 |   firecrawl-api:
54 |     <<: *common-service
55 |     environment:
56 |       <<: *common-env
57 |       HOST: "0.0.0.0"
58 |       PORT: ${INTERNAL_PORT:-3002}
59 |       FLY_PROCESS_GROUP: app
60 |     depends_on:
61 |       - playwright-service
62 |       - redis
63 |     ports:
64 |       - "${PORT:-3002}:${INTERNAL_PORT:-3002}"
65 |     command: [ "pnpm", "run", "start:production" ]
66 | 
67 |   firecrawl-worker:
68 |     <<: *common-service
69 |     environment:
70 |       <<: *common-env
71 |       FLY_PROCESS_GROUP: worker
72 |     depends_on:
73 |       - playwright-service
74 |       - firecrawl-api
75 |       - redis
76 |     command: [ "pnpm", "run", "workers" ]
77 | 
78 |   playwright-service:
79 |     image: docker.cnb.cool/aigc/firecrawl/playwright
80 |     environment:
81 |       PORT: 3000
82 |       PROXY_SERVER: ${PROXY_SERVER}
83 |       PROXY_USERNAME: ${PROXY_USERNAME}
84 |       PROXY_PASSWORD: ${PROXY_PASSWORD}
85 |       BLOCK_MEDIA: ${BLOCK_MEDIA}
86 |     networks:
87 |       - backend
88 | 
89 |   redis:
90 |     image: redis:alpine
91 |     networks:
92 |       - backend
93 |     command: redis-server --bind 0.0.0.0
94 | 
95 | networks:
96 |   backend:
97 |     driver: bridge


--------------------------------------------------------------------------------
/deploy/searxng/uwsgi.ini:
--------------------------------------------------------------------------------
 1 | [uwsgi]
 2 | # Who will run the code
 3 | uid = searxng
 4 | gid = searxng
 5 | 
 6 | # Number of workers (usually CPU count)
 7 | # default value: %k (= number of CPU core, see Dockerfile)
 8 | workers = %k
 9 | 
10 | # Number of threads per worker
11 | # default value: 4 (see Dockerfile)
12 | threads = 4
13 | 
14 | # The right granted on the created socket
15 | chmod-socket = 666
16 | 
17 | # Plugin to use and interpreter config
18 | single-interpreter = true
19 | master = true
20 | plugin = python3
21 | lazy-apps = true
22 | enable-threads = 4
23 | 
24 | # Module to import
25 | module = searx.webapp
26 | 
27 | # Virtualenv and python path
28 | pythonpath = /usr/local/searxng/
29 | chdir = /usr/local/searxng/searx/
30 | 
31 | # automatically set processes name to something meaningful
32 | auto-procname = true
33 | 
34 | # Disable request logging for privacy
35 | disable-logging = true
36 | log-5xx = true
37 | 
38 | # Set the max size of a request (request-body excluded)
39 | buffer-size = 8192
40 | 
41 | # No keep alive
42 | # See https://github.com/searx/searx-docker/issues/24
43 | add-header = Connection: close
44 | 
45 | # Follow SIGTERM convention
46 | # See https://github.com/searxng/searxng/issues/3427
47 | die-on-term
48 | 
49 | # uwsgi serves the static files
50 | static-map = /static=/usr/local/searxng/searx/static
51 | # expires set to one day
52 | static-expires = /* 86400
53 | static-gzip-all = True
54 | offload-threads = 4
55 | 


--------------------------------------------------------------------------------
/dist/index.d.cts:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | import AsyncRetry from 'async-retry';
 3 | 
 4 | interface IMediaItem {
 5 |     thumbnail?: string;
 6 |     src?: string;
 7 | }
 8 | interface ISearchRequestOptions {
 9 |     query: string;
10 |     page?: number;
11 |     limit?: number;
12 |     categories?: string;
13 |     format?: string;
14 |     language?: string;
15 |     engines?: string;
16 |     safeSearch?: 0 | 1 | 2;
17 |     timeRange?: string;
18 |     timeout?: number | string;
19 |     apiKey?: string;
20 |     apiUrl?: string;
21 |     retry?: AsyncRetry.Options;
22 | }
23 | interface ISearchResponseResult {
24 |     title: string;
25 |     snippet: string;
26 |     url: string;
27 |     thumbnailUrl?: string;
28 |     markdown?: string;
29 |     source?: string;
30 |     engine?: string;
31 |     image?: IMediaItem | null;
32 |     video?: IMediaItem | null;
33 | }
34 | interface ISearchResponse {
35 |     results: ISearchResponseResult[];
36 |     success: boolean;
37 | }
38 | type SearchProvider = 'searxng' | 'duckduckgo' | 'bing' | 'tavily' | 'local';
39 | type SearchTimeRange = 'year' | 'month' | 'week' | 'day';
40 | 
41 | export type { IMediaItem, ISearchRequestOptions, ISearchResponse, ISearchResponseResult, SearchProvider, SearchTimeRange };
42 | 


--------------------------------------------------------------------------------
/dist/index.d.ts:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | import AsyncRetry from 'async-retry';
 3 | 
 4 | interface IMediaItem {
 5 |     thumbnail?: string;
 6 |     src?: string;
 7 | }
 8 | interface ISearchRequestOptions {
 9 |     query: string;
10 |     page?: number;
11 |     limit?: number;
12 |     categories?: string;
13 |     format?: string;
14 |     language?: string;
15 |     engines?: string;
16 |     safeSearch?: 0 | 1 | 2;
17 |     timeRange?: string;
18 |     timeout?: number | string;
19 |     apiKey?: string;
20 |     apiUrl?: string;
21 |     retry?: AsyncRetry.Options;
22 | }
23 | interface ISearchResponseResult {
24 |     title: string;
25 |     snippet: string;
26 |     url: string;
27 |     thumbnailUrl?: string;
28 |     markdown?: string;
29 |     source?: string;
30 |     engine?: string;
31 |     image?: IMediaItem | null;
32 |     video?: IMediaItem | null;
33 | }
34 | interface ISearchResponse {
35 |     results: ISearchResponseResult[];
36 |     success: boolean;
37 | }
38 | type SearchProvider = 'searxng' | 'duckduckgo' | 'bing' | 'tavily' | 'local';
39 | type SearchTimeRange = 'year' | 'month' | 'week' | 'day';
40 | 
41 | export type { IMediaItem, ISearchRequestOptions, ISearchResponse, ISearchResponseResult, SearchProvider, SearchTimeRange };
42 | 


--------------------------------------------------------------------------------
/dist/index.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | import{Server as Se}from"@modelcontextprotocol/sdk/server/index.js";import{CallToolRequestSchema as Ee,ListToolsRequestSchema as ve}from"@modelcontextprotocol/sdk/types.js";import{StdioServerTransport as Te}from"@modelcontextprotocol/sdk/server/stdio.js";async function $(i){let{query:t,limit:e=10,safeSearch:r=0,page:n=1,apiUrl:a="https://api.bing.microsoft.com/v7.0/search",apiKey:o,language:s}=i,u=["Off","Moderate","Strict"];if(!o)throw new Error("Bing API key is required");let h={q:t,count:e,offset:(n-1)*e,mkt:s,safeSearch:u[r]};try{let c=new URLSearchParams;Object.entries(h).forEach(([d,w])=>{w!==void 0&&c.set(d,w.toString())});let g=await fetch(`${a}?${c}`,{method:"GET",headers:{"Content-Type":"application/json","Ocp-Apim-Subscription-Key":o}});if(!g.ok)throw new Error(`Bing search error: ${g.status} ${g.statusText}`);return{results:(await g.json()).webPages?.value?.map(d=>({title:d.name,snippet:d.snippet,url:d.url,source:d.siteName,thumbnailUrl:d.thumbnailUrl,language:d.language,image:null,video:null,engine:"bing"}))??[],success:!0}}catch(c){let g=c instanceof Error?c.message:"Bing search error.";throw process.stdout.write(g),c}}import*as T from"duck-duck-scrape";import oe from"async-retry";async function F(i){try{let{query:t,timeout:e=1e4,safeSearch:r=T.SafeSearchType.OFF,retry:n={retries:3},...a}=i,o=await oe(()=>T.search(t,{...a,safeSearch:r},{response_timeout:e}),n);return{results:(o?{noResults:o.noResults,vqd:o.vqd,results:o.results}:{noResults:!0,vqd:"",results:[]}).results.map(u=>({title:u.title,snippet:u.description,url:u.url,source:u.hostname,image:null,video:null,engine:"duckduckgo"})),success:!0}}catch(t){let e=t instanceof Error?t.message:"DuckDuckGo search error.";throw process.stdout.write(e),t}}import le from"node:url";async function G(i){try{let{query:t,page:e=1,limit:r=10,categories:n="general",engines:a="all",safeSearch:o=0,format:s="json",language:u="auto",timeRange:h="",timeout:c=1e4,apiKey:g,apiUrl:l}=i;if(!l)throw new Error("SearxNG API URL is required");let p=new AbortController,y=setTimeout(()=>p.abort(),Number(c)),d={q:t,pageno:e,categories:n,format:s,safesearch:o,language:u,engines:a,time_range:h},w=`${l}/search`,O=le.format({query:d}),I={"Content-Type":"application/json"};g&&(I.Authorization=`Bearer ${g}`);let ne=await fetch(`${w}${O}`,{method:"POST",headers:I,signal:p.signal});clearTimeout(y);let M=await ne.json();return M.results?{results:M.results.slice(0,r).map(f=>{let se=f.img_src?{thumbnail:f.thumbnail_src,src:f.img_src}:null,ae=f.iframe_src?{thumbnail:f.thumbnail_src,src:f.iframe_src}:null;return{title:f.title,snippet:f.content,url:f.url,source:f.source,image:se,video:ae,engine:f.engine}}),success:!0}:{results:[],success:!1}}catch(t){let e=t instanceof Error?t.message:"Searxng search error.";throw process.stdout.write(e),t}}import{tavily as ce}from"@tavily/core";async function q(i){let{query:t,limit:e=10,categories:r="general",timeRange:n,apiKey:a}=i;if(!a)throw new Error("Tavily API key is required");try{let o=ce({apiKey:a}),s={topic:r,timeRange:n,maxResults:e};return{results:(await o.search(t,s)).results.map(c=>({title:c.title,url:c.url,snippet:c.content,engine:"tavily"})),success:!0}}catch(o){let s=o instanceof Error?o.message:"Tavily search error.";throw process.stdout.write(s),o}}import{Page as tt}from"puppeteer-core";import*as _ from"fs";import*as D from"path";import*as H from"os";import{defaultLogger as ue}from"@agent-infra/logger";var N=class{logger;constructor(t){this.logger=t??ue}get browsers(){let t=H.homedir(),e=process.env.LOCALAPPDATA;return[{name:"Chromium",executable:{win32:"C:\\Program Files\\Chromium\\Application\\chrome.exe",darwin:"/Applications/Chromium.app/Contents/MacOS/Chromium",linux:"/usr/bin/chromium"},userDataDir:{win32:`${e}\\Chromium\\User Data`,darwin:`${t}/Library/Application Support/Chromium`,linux:`${t}/.config/chromium`}},{name:"Google Chrome",executable:{win32:"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",darwin:"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",linux:"/usr/bin/google-chrome"},userDataDir:{win32:`${e}\\Google\\Chrome\\User Data`,darwin:`${t}/Library/Application Support/Google/Chrome`,linux:`${t}/.config/google-chrome`}},{name:"Google Chrome Canary",executable:{win32:"C:\\Program Files\\Google\\Chrome Canary\\Application\\chrome.exe",darwin:"/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",linux:"/usr/bin/google-chrome-canary"},userDataDir:{win32:`${e}\\Google\\Chrome Canary\\User Data`,darwin:`${t}/Library/Application Support/Google/Chrome Canary`,linux:`${t}/.config/google-chrome-canary`}}]}findBrowser(t){let e=process.platform;if(this.logger.info("Finding browser on platform:",e),e!=="darwin"&&e!=="win32"&&e!=="linux"){let a=new Error(`Unsupported platform: ${e}`);throw this.logger.error(a.message),a}let r=t?this.browsers.find(a=>a.name===t&&_.existsSync(a.executable[e])):this.browsers.find(a=>_.existsSync(a.executable[e]));if(this.logger.log("browser",r),!r){let a=t?new Error(`Cannot find browser: ${t}`):new Error("Cannot find a supported browser on your system. Please install Chrome, Edge, or Brave.");throw this.logger.error(a.message),a}let n={executable:r.executable[e],userDataDir:r.userDataDir[e]};return this.logger.success(`Found browser: ${r.name}`),this.logger.info("Browser details:",n),n}getBrowserProfiles(t){let e=this.findBrowser(t);try{let n=JSON.parse(_.readFileSync(D.join(e.userDataDir,"Local State"),"utf8")).profile.info_cache;return Object.entries(n).map(([a,o])=>({displayName:o.name,path:D.join(e.userDataDir,a)}))}catch{return[]}}findChrome(){try{let{executable:t}=this.findBrowser("Google Chrome");return t}catch{return null}}};import{defaultLogger as he}from"@agent-infra/logger";var S=class{browser=null;logger;activePage=null;constructor(t){this.logger=t?.logger??he,this.logger.info("Browser Options:",t)}getBrowser(){if(!this.browser)throw new Error("Browser not launched");return this.browser}async setupPageListener(){this.browser&&this.browser.on("targetcreated",async t=>{let e=await t.page();e&&(this.logger.info("New page created:",await e.url()),this.activePage=e,e.once("close",()=>{this.activePage===e&&(this.activePage=null)}),e.once("error",()=>{this.activePage===e&&(this.activePage=null)}))})}async close(){this.logger.info("Closing browser");try{await this.browser?.close(),this.browser=null,this.logger.success("Browser closed successfully")}catch(t){throw this.logger.error("Failed to close browser:",t),t}}async evaluateOnNewPage(t){let{url:e,pageFunction:r,pageFunctionParams:n,beforePageLoad:a,afterPageLoad:o,beforeSendResult:s,waitForOptions:u}=t,h=await this.browser.newPage();try{await a?.(h),await h.goto(e,{waitUntil:"networkidle2",...u}),await o?.(h);let c=await h.evaluateHandle(()=>window),g=await h.evaluate(r,c,...n);return await s?.(h,g),await c.dispose(),await h.close(),g}catch(c){throw await h.close(),c}}async createPage(){if(!this.browser)throw this.logger.error("No active browser"),new Error("Browser not launched");return await this.browser.newPage()}async getActivePage(){if(!this.browser)throw new Error("Browser not launched");if(this.activePage)try{return await this.activePage.evaluate(()=>document.readyState),this.activePage}catch(e){this.logger.warn("Active page no longer available:",e),this.activePage=null}let t=await this.browser.pages();if(t.length===0)return this.activePage=await this.createPage(),this.activePage;for(let e=t.length-1;e>=0;e--){let r=t[e];try{return await r.evaluate(()=>document.readyState),this.activePage=r,r}catch{continue}}throw new Error("No active page found")}};import*as j from"puppeteer-core";var A=class extends S{browserFinder=new N;async launch(t={}){this.logger.info("Launching browser with options:",t);let e=t?.executablePath||this.browserFinder.findBrowser().executable;this.logger.info("Using executable path:",e);let r=t?.defaultViewport?.width??1280,n=t?.defaultViewport?.height??800,a={executablePath:e,headless:t?.headless??!1,defaultViewport:{width:r,height:n},args:["--no-sandbox","--mute-audio","--disable-gpu","--disable-http2","--disable-blink-features=AutomationControlled","--disable-infobars","--disable-background-timer-throttling","--disable-popup-blocking","--disable-backgrounding-occluded-windows","--disable-renderer-backgrounding","--disable-window-activation","--disable-focus-on-load","--no-default-browser-check","--disable-web-security","--disable-features=IsolateOrigins,site-per-process","--disable-site-isolation-trials",`--window-size=${r},${n+90}`,t?.proxy?`--proxy-server=${t.proxy}`:"",t?.profilePath?`--profile-directory=${t.profilePath}`:""].filter(Boolean),ignoreDefaultArgs:["--enable-automation"],timeout:t.timeout??0,downloadBehavior:{policy:"deny"}};this.logger.info("Launch options:",a);try{this.browser=await j.launch(a),await this.setupPageListener(),this.logger.success("Browser launched successfully")}catch(o){throw this.logger.error("Failed to launch browser:",o),o}}};import*as ge from"puppeteer-core";var W='function q(t,e){if(e&&e.documentElement)t=e,e=arguments[2];else if(!t||!t.documentElement)throw new Error("First argument to Readability constructor should be a document object.");if(e=e||{},this._doc=t,this._docJSDOMParser=this._doc.firstChild.__JSDOMParser__,this._articleTitle=null,this._articleByline=null,this._articleDir=null,this._articleSiteName=null,this._attempts=[],this._debug=!!e.debug,this._maxElemsToParse=e.maxElemsToParse||this.DEFAULT_MAX_ELEMS_TO_PARSE,this._nbTopCandidates=e.nbTopCandidates||this.DEFAULT_N_TOP_CANDIDATES,this._charThreshold=e.charThreshold||this.DEFAULT_CHAR_THRESHOLD,this._classesToPreserve=this.CLASSES_TO_PRESERVE.concat(e.classesToPreserve||[]),this._keepClasses=!!e.keepClasses,this._serializer=e.serializer||function(i){return i.innerHTML},this._disableJSONLD=!!e.disableJSONLD,this._allowedVideoRegex=e.allowedVideoRegex||this.REGEXPS.videos,this._flags=this.FLAG_STRIP_UNLIKELYS|this.FLAG_WEIGHT_CLASSES|this.FLAG_CLEAN_CONDITIONALLY,this._debug){let i=function(r){if(r.nodeType==r.TEXT_NODE)return`${r.nodeName} ("${r.textContent}")`;let l=Array.from(r.attributes||[],function(a){return`${a.name}="${a.value}"`}).join(" ");return`<${r.localName} ${l}>`};this.log=function(){if(typeof console!="undefined"){let l=Array.from(arguments,a=>a&&a.nodeType==this.ELEMENT_NODE?i(a):a);l.unshift("Reader: (Readability)"),console.log.apply(console,l)}else if(typeof dump!="undefined"){var r=Array.prototype.map.call(arguments,function(l){return l&&l.nodeName?i(l):l}).join(" ");dump("Reader: (Readability) "+r+`\n`)}}}else this.log=function(){}}q.prototype={FLAG_STRIP_UNLIKELYS:1,FLAG_WEIGHT_CLASSES:2,FLAG_CLEAN_CONDITIONALLY:4,ELEMENT_NODE:1,TEXT_NODE:3,DEFAULT_MAX_ELEMS_TO_PARSE:0,DEFAULT_N_TOP_CANDIDATES:5,DEFAULT_TAGS_TO_SCORE:"section,h2,h3,h4,h5,h6,p,td,pre".toUpperCase().split(","),DEFAULT_CHAR_THRESHOLD:500,REGEXPS:{unlikelyCandidates:/-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,okMaybeItsACandidate:/and|article|body|column|content|main|shadow/i,positive:/article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i,negative:/-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i,extraneous:/print|archive|comment|discuss|e[\\-]?mail|share|reply|all|login|sign|single|utility/i,byline:/byline|author|dateline|writtenby|p-author/i,replaceFonts:/<(\\/?)font[^>]*>/gi,normalize:/\\s{2,}/g,videos:/\\/\\/(www\\.)?((dailymotion|youtube|youtube-nocookie|player\\.vimeo|v\\.qq)\\.com|(archive|upload\\.wikimedia)\\.org|player\\.twitch\\.tv)/i,shareElements:/(\\b|_)(share|sharedaddy)(\\b|_)/i,nextLink:/(next|weiter|continue|>([^\\|]|$)|\xBB([^\\|]|$))/i,prevLink:/(prev|earl|old|new|<|\xAB)/i,tokenize:/\\W+/g,whitespace:/^\\s*$/,hasContent:/\\S$/,hashUrl:/^#.+/,srcsetUrl:/(\\S+)(\\s+[\\d.]+[xw])?(\\s*(?:,|$))/g,b64DataUrl:/^data:\\s*([^\\s;,]+)\\s*;\\s*base64\\s*,/i,commas:/\\u002C|\\u060C|\\uFE50|\\uFE10|\\uFE11|\\u2E41|\\u2E34|\\u2E32|\\uFF0C/g,jsonLdArticleTypes:/^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$/},UNLIKELY_ROLES:["menu","menubar","complementary","navigation","alert","alertdialog","dialog"],DIV_TO_P_ELEMS:new Set(["BLOCKQUOTE","DL","DIV","IMG","OL","P","PRE","TABLE","UL"]),ALTER_TO_DIV_EXCEPTIONS:["DIV","ARTICLE","SECTION","P"],PRESENTATIONAL_ATTRIBUTES:["align","background","bgcolor","border","cellpadding","cellspacing","frame","hspace","rules","style","valign","vspace"],DEPRECATED_SIZE_ATTRIBUTE_ELEMS:["TABLE","TH","TD","HR","PRE"],PHRASING_ELEMS:["ABBR","AUDIO","B","BDO","BR","BUTTON","CITE","CODE","DATA","DATALIST","DFN","EM","EMBED","I","IMG","INPUT","KBD","LABEL","MARK","MATH","METER","NOSCRIPT","OBJECT","OUTPUT","PROGRESS","Q","RUBY","SAMP","SCRIPT","SELECT","SMALL","SPAN","STRONG","SUB","SUP","TEXTAREA","TIME","VAR","WBR"],CLASSES_TO_PRESERVE:["page"],HTML_ESCAPE_MAP:{lt:"<",gt:">",amp:"&",quot:\'"\',apos:"\'"},_postProcessContent:function(t){this._fixRelativeUris(t),this._simplifyNestedElements(t),this._keepClasses||this._cleanClasses(t)},_removeNodes:function(t,e){if(this._docJSDOMParser&&t._isLiveNodeList)throw new Error("Do not pass live node lists to _removeNodes");for(var i=t.length-1;i>=0;i--){var r=t[i],l=r.parentNode;l&&(!e||e.call(this,r,i,t))&&l.removeChild(r)}},_replaceNodeTags:function(t,e){if(this._docJSDOMParser&&t._isLiveNodeList)throw new Error("Do not pass live node lists to _replaceNodeTags");for(let i of t)this._setNodeTag(i,e)},_forEachNode:function(t,e){Array.prototype.forEach.call(t,e,this)},_findNode:function(t,e){return Array.prototype.find.call(t,e,this)},_someNode:function(t,e){return Array.prototype.some.call(t,e,this)},_everyNode:function(t,e){return Array.prototype.every.call(t,e,this)},_concatNodeLists:function(){var t=Array.prototype.slice,e=t.call(arguments),i=e.map(function(r){return t.call(r)});return Array.prototype.concat.apply([],i)},_getAllNodesWithTag:function(t,e){return t.querySelectorAll?t.querySelectorAll(e.join(",")):[].concat.apply([],e.map(function(i){var r=t.getElementsByTagName(i);return Array.isArray(r)?r:Array.from(r)}))},_cleanClasses:function(t){var e=this._classesToPreserve,i=(t.getAttribute("class")||"").split(/\\s+/).filter(function(r){return e.indexOf(r)!=-1}).join(" ");for(i?t.setAttribute("class",i):t.removeAttribute("class"),t=t.firstElementChild;t;t=t.nextElementSibling)this._cleanClasses(t)},_fixRelativeUris:function(t){var e=this._doc.baseURI,i=this._doc.documentURI;function r(s){if(e==i&&s.charAt(0)=="#")return s;try{return new URL(s,e).href}catch(h){}return s}var l=this._getAllNodesWithTag(t,["a"]);this._forEachNode(l,function(s){var h=s.getAttribute("href");if(h)if(h.indexOf("javascript:")===0)if(s.childNodes.length===1&&s.childNodes[0].nodeType===this.TEXT_NODE){var c=this._doc.createTextNode(s.textContent);s.parentNode.replaceChild(c,s)}else{for(var n=this._doc.createElement("span");s.firstChild;)n.appendChild(s.firstChild);s.parentNode.replaceChild(n,s)}else s.setAttribute("href",r(h))});var a=this._getAllNodesWithTag(t,["img","picture","figure","video","audio","source"]);this._forEachNode(a,function(s){var h=s.getAttribute("src"),c=s.getAttribute("poster"),n=s.getAttribute("srcset");if(h&&s.setAttribute("src",r(h)),c&&s.setAttribute("poster",r(c)),n){var u=n.replace(this.REGEXPS.srcsetUrl,function(m,b,N,v){return r(b)+(N||"")+v});s.setAttribute("srcset",u)}})},_simplifyNestedElements:function(t){for(var e=t;e;){if(e.parentNode&&["DIV","SECTION"].includes(e.tagName)&&!(e.id&&e.id.startsWith("readability"))){if(this._isElementWithoutContent(e)){e=this._removeAndGetNext(e);continue}else if(this._hasSingleTagInsideElement(e,"DIV")||this._hasSingleTagInsideElement(e,"SECTION")){for(var i=e.children[0],r=0;r<e.attributes.length;r++)i.setAttribute(e.attributes[r].name,e.attributes[r].value);e.parentNode.replaceChild(i,e),e=i;continue}}e=this._getNextNode(e)}},_getArticleTitle:function(){var t=this._doc,e="",i="";try{e=i=t.title.trim(),typeof e!="string"&&(e=i=this._getInnerText(t.getElementsByTagName("title")[0]))}catch(u){}var r=!1;function l(u){return u.split(/\\s+/).length}if(/ [\\|\\-\\\\\\/>\xBB] /.test(e))r=/ [\\\\\\/>\xBB] /.test(e),e=i.replace(/(.*)[\\|\\-\\\\\\/>\xBB] .*/gi,"$1"),l(e)<3&&(e=i.replace(/[^\\|\\-\\\\\\/>\xBB]*[\\|\\-\\\\\\/>\xBB](.*)/gi,"$1"));else if(e.indexOf(": ")!==-1){var a=this._concatNodeLists(t.getElementsByTagName("h1"),t.getElementsByTagName("h2")),s=e.trim(),h=this._someNode(a,function(u){return u.textContent.trim()===s});h||(e=i.substring(i.lastIndexOf(":")+1),l(e)<3?e=i.substring(i.indexOf(":")+1):l(i.substr(0,i.indexOf(":")))>5&&(e=i))}else if(e.length>150||e.length<15){var c=t.getElementsByTagName("h1");c.length===1&&(e=this._getInnerText(c[0]))}e=e.trim().replace(this.REGEXPS.normalize," ");var n=l(e);return n<=4&&(!r||n!=l(i.replace(/[\\|\\-\\\\\\/>\xBB]+/g,""))-1)&&(e=i),e},_prepDocument:function(){var t=this._doc;this._removeNodes(this._getAllNodesWithTag(t,["style"])),t.body&&this._replaceBrs(t.body),this._replaceNodeTags(this._getAllNodesWithTag(t,["font"]),"SPAN")},_nextNode:function(t){for(var e=t;e&&e.nodeType!=this.ELEMENT_NODE&&this.REGEXPS.whitespace.test(e.textContent);)e=e.nextSibling;return e},_replaceBrs:function(t){this._forEachNode(this._getAllNodesWithTag(t,["br"]),function(e){for(var i=e.nextSibling,r=!1;(i=this._nextNode(i))&&i.tagName=="BR";){r=!0;var l=i.nextSibling;i.parentNode.removeChild(i),i=l}if(r){var a=this._doc.createElement("p");for(e.parentNode.replaceChild(a,e),i=a.nextSibling;i;){if(i.tagName=="BR"){var s=this._nextNode(i.nextSibling);if(s&&s.tagName=="BR")break}if(!this._isPhrasingContent(i))break;var h=i.nextSibling;a.appendChild(i),i=h}for(;a.lastChild&&this._isWhitespace(a.lastChild);)a.removeChild(a.lastChild);a.parentNode.tagName==="P"&&this._setNodeTag(a.parentNode,"DIV")}})},_setNodeTag:function(t,e){if(this.log("_setNodeTag",t,e),this._docJSDOMParser)return t.localName=e.toLowerCase(),t.tagName=e.toUpperCase(),t;for(var i=t.ownerDocument.createElement(e);t.firstChild;)i.appendChild(t.firstChild);t.parentNode.replaceChild(i,t),t.readability&&(i.readability=t.readability);for(var r=0;r<t.attributes.length;r++)try{i.setAttribute(t.attributes[r].name,t.attributes[r].value)}catch(l){}return i},_prepArticle:function(t){this._cleanStyles(t),this._markDataTables(t),this._fixLazyImages(t),this._cleanConditionally(t,"form"),this._cleanConditionally(t,"fieldset"),this._clean(t,"object"),this._clean(t,"embed"),this._clean(t,"footer"),this._clean(t,"link"),this._clean(t,"aside");var e=this.DEFAULT_CHAR_THRESHOLD;this._forEachNode(t.children,function(i){this._cleanMatchedNodes(i,function(r,l){return this.REGEXPS.shareElements.test(l)&&r.textContent.length<e})}),this._clean(t,"iframe"),this._clean(t,"input"),this._clean(t,"textarea"),this._clean(t,"select"),this._clean(t,"button"),this._cleanHeaders(t),this._cleanConditionally(t,"table"),this._cleanConditionally(t,"ul"),this._cleanConditionally(t,"div"),this._replaceNodeTags(this._getAllNodesWithTag(t,["h1"]),"h2"),this._removeNodes(this._getAllNodesWithTag(t,["p"]),function(i){var r=i.getElementsByTagName("img").length,l=i.getElementsByTagName("embed").length,a=i.getElementsByTagName("object").length,s=i.getElementsByTagName("iframe").length,h=r+l+a+s;return h===0&&!this._getInnerText(i,!1)}),this._forEachNode(this._getAllNodesWithTag(t,["br"]),function(i){var r=this._nextNode(i.nextSibling);r&&r.tagName=="P"&&i.parentNode.removeChild(i)}),this._forEachNode(this._getAllNodesWithTag(t,["table"]),function(i){var r=this._hasSingleTagInsideElement(i,"TBODY")?i.firstElementChild:i;if(this._hasSingleTagInsideElement(r,"TR")){var l=r.firstElementChild;if(this._hasSingleTagInsideElement(l,"TD")){var a=l.firstElementChild;a=this._setNodeTag(a,this._everyNode(a.childNodes,this._isPhrasingContent)?"P":"DIV"),i.parentNode.replaceChild(a,i)}}})},_initializeNode:function(t){switch(t.readability={contentScore:0},t.tagName){case"DIV":t.readability.contentScore+=5;break;case"PRE":case"TD":case"BLOCKQUOTE":t.readability.contentScore+=3;break;case"ADDRESS":case"OL":case"UL":case"DL":case"DD":case"DT":case"LI":case"FORM":t.readability.contentScore-=3;break;case"H1":case"H2":case"H3":case"H4":case"H5":case"H6":case"TH":t.readability.contentScore-=5;break}t.readability.contentScore+=this._getClassWeight(t)},_removeAndGetNext:function(t){var e=this._getNextNode(t,!0);return t.parentNode.removeChild(t),e},_getNextNode:function(t,e){if(!e&&t.firstElementChild)return t.firstElementChild;if(t.nextElementSibling)return t.nextElementSibling;do t=t.parentNode;while(t&&!t.nextElementSibling);return t&&t.nextElementSibling},_textSimilarity:function(t,e){var i=t.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean),r=e.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean);if(!i.length||!r.length)return 0;var l=r.filter(s=>!i.includes(s)),a=l.join(" ").length/r.join(" ").length;return 1-a},_checkByline:function(t,e){if(this._articleByline)return!1;if(t.getAttribute!==void 0)var i=t.getAttribute("rel"),r=t.getAttribute("itemprop");return(i==="author"||r&&r.indexOf("author")!==-1||this.REGEXPS.byline.test(e))&&this._isValidByline(t.textContent)?(this._articleByline=t.textContent.trim(),!0):!1},_getNodeAncestors:function(t,e){e=e||0;for(var i=0,r=[];t.parentNode&&(r.push(t.parentNode),!(e&&++i===e));)t=t.parentNode;return r},_grabArticle:function(t){this.log("**** grabArticle ****");var e=this._doc,i=t!==null;if(t=t||this._doc.body,!t)return this.log("No body found in document. Abort."),null;for(var r=t.innerHTML;;){this.log("Starting grabArticle loop");var l=this._flagIsActive(this.FLAG_STRIP_UNLIKELYS),a=[],s=this._doc.documentElement;let J=!0;for(;s;){s.tagName==="HTML"&&(this._articleLang=s.getAttribute("lang"));var h=s.className+" "+s.id;if(!this._isProbablyVisible(s)){this.log("Removing hidden node - "+h),s=this._removeAndGetNext(s);continue}if(s.getAttribute("aria-modal")=="true"&&s.getAttribute("role")=="dialog"){s=this._removeAndGetNext(s);continue}if(this._checkByline(s,h)){s=this._removeAndGetNext(s);continue}if(J&&this._headerDuplicatesTitle(s)){this.log("Removing header: ",s.textContent.trim(),this._articleTitle.trim()),J=!1,s=this._removeAndGetNext(s);continue}if(l){if(this.REGEXPS.unlikelyCandidates.test(h)&&!this.REGEXPS.okMaybeItsACandidate.test(h)&&!this._hasAncestorTag(s,"table")&&!this._hasAncestorTag(s,"code")&&s.tagName!=="BODY"&&s.tagName!=="A"){this.log("Removing unlikely candidate - "+h),s=this._removeAndGetNext(s);continue}if(this.UNLIKELY_ROLES.includes(s.getAttribute("role"))){this.log("Removing content with role "+s.getAttribute("role")+" - "+h),s=this._removeAndGetNext(s);continue}}if((s.tagName==="DIV"||s.tagName==="SECTION"||s.tagName==="HEADER"||s.tagName==="H1"||s.tagName==="H2"||s.tagName==="H3"||s.tagName==="H4"||s.tagName==="H5"||s.tagName==="H6")&&this._isElementWithoutContent(s)){s=this._removeAndGetNext(s);continue}if(this.DEFAULT_TAGS_TO_SCORE.indexOf(s.tagName)!==-1&&a.push(s),s.tagName==="DIV"){for(var c=null,n=s.firstChild;n;){var u=n.nextSibling;if(this._isPhrasingContent(n))c!==null?c.appendChild(n):this._isWhitespace(n)||(c=e.createElement("p"),s.replaceChild(c,n),c.appendChild(n));else if(c!==null){for(;c.lastChild&&this._isWhitespace(c.lastChild);)c.removeChild(c.lastChild);c=null}n=u}if(this._hasSingleTagInsideElement(s,"P")&&this._getLinkDensity(s)<.25){var m=s.children[0];s.parentNode.replaceChild(m,s),s=m,a.push(s)}else this._hasChildBlockElement(s)||(s=this._setNodeTag(s,"P"),a.push(s))}s=this._getNextNode(s)}var b=[];this._forEachNode(a,function(A){if(!(!A.parentNode||typeof A.parentNode.tagName=="undefined")){var T=this._getInnerText(A);if(!(T.length<25)){var K=this._getNodeAncestors(A,5);if(K.length!==0){var C=0;C+=1,C+=T.split(this.REGEXPS.commas).length,C+=Math.min(Math.floor(T.length/100),3),this._forEachNode(K,function(S,F){if(!(!S.tagName||!S.parentNode||typeof S.parentNode.tagName=="undefined")){if(typeof S.readability=="undefined"&&(this._initializeNode(S),b.push(S)),F===0)var X=1;else F===1?X=2:X=F*3;S.readability.contentScore+=C/X}})}}}});for(var N=[],v=0,y=b.length;v<y;v+=1){var E=b[v],d=E.readability.contentScore*(1-this._getLinkDensity(E));E.readability.contentScore=d,this.log("Candidate:",E,"with score "+d);for(var p=0;p<this._nbTopCandidates;p++){var x=N[p];if(!x||d>x.readability.contentScore){N.splice(p,0,E),N.length>this._nbTopCandidates&&N.pop();break}}}var o=N[0]||null,L=!1,g;if(o===null||o.tagName==="BODY"){for(o=e.createElement("DIV"),L=!0;t.firstChild;)this.log("Moving child out:",t.firstChild),o.appendChild(t.firstChild);t.appendChild(o),this._initializeNode(o)}else if(o){for(var I=[],P=1;P<N.length;P++)N[P].readability.contentScore/o.readability.contentScore>=.75&&I.push(this._getNodeAncestors(N[P]));var O=3;if(I.length>=O)for(g=o.parentNode;g.tagName!=="BODY";){for(var G=0,H=0;H<I.length&&G<O;H++)G+=Number(I[H].includes(g));if(G>=O){o=g;break}g=g.parentNode}o.readability||this._initializeNode(o),g=o.parentNode;for(var M=o.readability.contentScore,Q=M/3;g.tagName!=="BODY";){if(!g.readability){g=g.parentNode;continue}var V=g.readability.contentScore;if(V<Q)break;if(V>M){o=g;break}M=g.readability.contentScore,g=g.parentNode}for(g=o.parentNode;g.tagName!="BODY"&&g.children.length==1;)o=g,g=o.parentNode;o.readability||this._initializeNode(o)}var _=e.createElement("DIV");i&&(_.id="readability-content");var Z=Math.max(10,o.readability.contentScore*.2);g=o.parentNode;for(var U=g.children,w=0,j=U.length;w<j;w++){var f=U[w],R=!1;if(this.log("Looking at sibling node:",f,f.readability?"with score "+f.readability.contentScore:""),this.log("Sibling has score",f.readability?f.readability.contentScore:"Unknown"),f===o)R=!0;else{var $=0;if(f.className===o.className&&o.className!==""&&($+=o.readability.contentScore*.2),f.readability&&f.readability.contentScore+$>=Z)R=!0;else if(f.nodeName==="P"){var Y=this._getLinkDensity(f),z=this._getInnerText(f),k=z.length;(k>80&&Y<.25||k<80&&k>0&&Y===0&&z.search(/\\.( |$)/)!==-1)&&(R=!0)}}R&&(this.log("Appending node:",f),this.ALTER_TO_DIV_EXCEPTIONS.indexOf(f.nodeName)===-1&&(this.log("Altering sibling:",f,"to div."),f=this._setNodeTag(f,"DIV")),_.appendChild(f),U=g.children,w-=1,j-=1)}if(this._debug&&this.log("Article content pre-prep: "+_.innerHTML),this._prepArticle(_),this._debug&&this.log("Article content post-prep: "+_.innerHTML),L)o.id="readability-page-1",o.className="page";else{var B=e.createElement("DIV");for(B.id="readability-page-1",B.className="page";_.firstChild;)B.appendChild(_.firstChild);_.appendChild(B)}this._debug&&this.log("Article content after paging: "+_.innerHTML);var W=!0,D=this._getInnerText(_,!0).length;if(D<this._charThreshold)if(W=!1,t.innerHTML=r,this._flagIsActive(this.FLAG_STRIP_UNLIKELYS))this._removeFlag(this.FLAG_STRIP_UNLIKELYS),this._attempts.push({articleContent:_,textLength:D});else if(this._flagIsActive(this.FLAG_WEIGHT_CLASSES))this._removeFlag(this.FLAG_WEIGHT_CLASSES),this._attempts.push({articleContent:_,textLength:D});else if(this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY))this._removeFlag(this.FLAG_CLEAN_CONDITIONALLY),this._attempts.push({articleContent:_,textLength:D});else{if(this._attempts.push({articleContent:_,textLength:D}),this._attempts.sort(function(A,T){return T.textLength-A.textLength}),!this._attempts[0].textLength)return null;_=this._attempts[0].articleContent,W=!0}if(W){var tt=[g,o].concat(this._getNodeAncestors(g));return this._someNode(tt,function(A){if(!A.tagName)return!1;var T=A.getAttribute("dir");return T?(this._articleDir=T,!0):!1}),_}}},_isValidByline:function(t){return typeof t=="string"||t instanceof String?(t=t.trim(),t.length>0&&t.length<100):!1},_unescapeHtmlEntities:function(t){if(!t)return t;var e=this.HTML_ESCAPE_MAP;return t.replace(/&(quot|amp|apos|lt|gt);/g,function(i,r){return e[r]}).replace(/&#(?:x([0-9a-z]{1,4})|([0-9]{1,4}));/gi,function(i,r,l){var a=parseInt(r||l,r?16:10);return String.fromCharCode(a)})},_getJSONLD:function(t){var e=this._getAllNodesWithTag(t,["script"]),i;return this._forEachNode(e,function(r){if(!i&&r.getAttribute("type")==="application/ld+json")try{var l=r.textContent.replace(/^\\s*<!\\[CDATA\\[|\\]\\]>\\s*$/g,""),a=JSON.parse(l);if(!a["@context"]||!a["@context"].match(/^https?\\:\\/\\/schema\\.org$/)||(!a["@type"]&&Array.isArray(a["@graph"])&&(a=a["@graph"].find(function(n){return(n["@type"]||"").match(this.REGEXPS.jsonLdArticleTypes)})),!a||!a["@type"]||!a["@type"].match(this.REGEXPS.jsonLdArticleTypes)))return;if(i={},typeof a.name=="string"&&typeof a.headline=="string"&&a.name!==a.headline){var s=this._getArticleTitle(),h=this._textSimilarity(a.name,s)>.75,c=this._textSimilarity(a.headline,s)>.75;c&&!h?i.title=a.headline:i.title=a.name}else typeof a.name=="string"?i.title=a.name.trim():typeof a.headline=="string"&&(i.title=a.headline.trim());a.author&&(typeof a.author.name=="string"?i.byline=a.author.name.trim():Array.isArray(a.author)&&a.author[0]&&typeof a.author[0].name=="string"&&(i.byline=a.author.filter(function(n){return n&&typeof n.name=="string"}).map(function(n){return n.name.trim()}).join(", "))),typeof a.description=="string"&&(i.excerpt=a.description.trim()),a.publisher&&typeof a.publisher.name=="string"&&(i.siteName=a.publisher.name.trim()),typeof a.datePublished=="string"&&(i.datePublished=a.datePublished.trim());return}catch(n){this.log(n.message)}}),i||{}},_getArticleMetadata:function(t){var e={},i={},r=this._doc.getElementsByTagName("meta"),l=/\\s*(article|dc|dcterm|og|twitter)\\s*:\\s*(author|creator|description|published_time|title|site_name)\\s*/gi,a=/^\\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\\s*[\\.:]\\s*)?(author|creator|description|title|site_name)\\s*$/i;return this._forEachNode(r,function(s){var h=s.getAttribute("name"),c=s.getAttribute("property"),n=s.getAttribute("content");if(n){var u=null,m=null;c&&(u=c.match(l),u&&(m=u[0].toLowerCase().replace(/\\s/g,""),i[m]=n.trim())),!u&&h&&a.test(h)&&(m=h,n&&(m=m.toLowerCase().replace(/\\s/g,"").replace(/\\./g,":"),i[m]=n.trim()))}}),e.title=t.title||i["dc:title"]||i["dcterm:title"]||i["og:title"]||i["weibo:article:title"]||i["weibo:webpage:title"]||i.title||i["twitter:title"],e.title||(e.title=this._getArticleTitle()),e.byline=t.byline||i["dc:creator"]||i["dcterm:creator"]||i.author,e.excerpt=t.excerpt||i["dc:description"]||i["dcterm:description"]||i["og:description"]||i["weibo:article:description"]||i["weibo:webpage:description"]||i.description||i["twitter:description"],e.siteName=t.siteName||i["og:site_name"],e.publishedTime=t.datePublished||i["article:published_time"]||null,e.title=this._unescapeHtmlEntities(e.title),e.byline=this._unescapeHtmlEntities(e.byline),e.excerpt=this._unescapeHtmlEntities(e.excerpt),e.siteName=this._unescapeHtmlEntities(e.siteName),e.publishedTime=this._unescapeHtmlEntities(e.publishedTime),e},_isSingleImage:function(t){return t.tagName==="IMG"?!0:t.children.length!==1||t.textContent.trim()!==""?!1:this._isSingleImage(t.children[0])},_unwrapNoscriptImages:function(t){var e=Array.from(t.getElementsByTagName("img"));this._forEachNode(e,function(r){for(var l=0;l<r.attributes.length;l++){var a=r.attributes[l];switch(a.name){case"src":case"srcset":case"data-src":case"data-srcset":return}if(/\\.(jpg|jpeg|png|webp)/i.test(a.value))return}r.parentNode.removeChild(r)});var i=Array.from(t.getElementsByTagName("noscript"));this._forEachNode(i,function(r){var l=t.createElement("div");if(l.innerHTML=r.innerHTML,!!this._isSingleImage(l)){var a=r.previousElementSibling;if(a&&this._isSingleImage(a)){var s=a;s.tagName!=="IMG"&&(s=a.getElementsByTagName("img")[0]);for(var h=l.getElementsByTagName("img")[0],c=0;c<s.attributes.length;c++){var n=s.attributes[c];if(n.value!==""&&(n.name==="src"||n.name==="srcset"||/\\.(jpg|jpeg|png|webp)/i.test(n.value))){if(h.getAttribute(n.name)===n.value)continue;var u=n.name;h.hasAttribute(u)&&(u="data-old-"+u),h.setAttribute(u,n.value)}}r.parentNode.replaceChild(l.firstElementChild,a)}}})},_removeScripts:function(t){this._removeNodes(this._getAllNodesWithTag(t,["script","noscript"]))},_hasSingleTagInsideElement:function(t,e){return t.children.length!=1||t.children[0].tagName!==e?!1:!this._someNode(t.childNodes,function(i){return i.nodeType===this.TEXT_NODE&&this.REGEXPS.hasContent.test(i.textContent)})},_isElementWithoutContent:function(t){return t.nodeType===this.ELEMENT_NODE&&t.textContent.trim().length==0&&(t.children.length==0||t.children.length==t.getElementsByTagName("br").length+t.getElementsByTagName("hr").length)},_hasChildBlockElement:function(t){return this._someNode(t.childNodes,function(e){return this.DIV_TO_P_ELEMS.has(e.tagName)||this._hasChildBlockElement(e)})},_isPhrasingContent:function(t){return t.nodeType===this.TEXT_NODE||this.PHRASING_ELEMS.indexOf(t.tagName)!==-1||(t.tagName==="A"||t.tagName==="DEL"||t.tagName==="INS")&&this._everyNode(t.childNodes,this._isPhrasingContent)},_isWhitespace:function(t){return t.nodeType===this.TEXT_NODE&&t.textContent.trim().length===0||t.nodeType===this.ELEMENT_NODE&&t.tagName==="BR"},_getInnerText:function(t,e){e=typeof e=="undefined"?!0:e;var i=t.textContent.trim();return e?i.replace(this.REGEXPS.normalize," "):i},_getCharCount:function(t,e){return e=e||",",this._getInnerText(t).split(e).length-1},_cleanStyles:function(t){if(!(!t||t.tagName.toLowerCase()==="svg")){for(var e=0;e<this.PRESENTATIONAL_ATTRIBUTES.length;e++)t.removeAttribute(this.PRESENTATIONAL_ATTRIBUTES[e]);this.DEPRECATED_SIZE_ATTRIBUTE_ELEMS.indexOf(t.tagName)!==-1&&(t.removeAttribute("width"),t.removeAttribute("height"));for(var i=t.firstElementChild;i!==null;)this._cleanStyles(i),i=i.nextElementSibling}},_getLinkDensity:function(t){var e=this._getInnerText(t).length;if(e===0)return 0;var i=0;return this._forEachNode(t.getElementsByTagName("a"),function(r){var l=r.getAttribute("href"),a=l&&this.REGEXPS.hashUrl.test(l)?.3:1;i+=this._getInnerText(r).length*a}),i/e},_getClassWeight:function(t){if(!this._flagIsActive(this.FLAG_WEIGHT_CLASSES))return 0;var e=0;return typeof t.className=="string"&&t.className!==""&&(this.REGEXPS.negative.test(t.className)&&(e-=25),this.REGEXPS.positive.test(t.className)&&(e+=25)),typeof t.id=="string"&&t.id!==""&&(this.REGEXPS.negative.test(t.id)&&(e-=25),this.REGEXPS.positive.test(t.id)&&(e+=25)),e},_clean:function(t,e){var i=["object","embed","iframe"].indexOf(e)!==-1;this._removeNodes(this._getAllNodesWithTag(t,[e]),function(r){if(i){for(var l=0;l<r.attributes.length;l++)if(this._allowedVideoRegex.test(r.attributes[l].value))return!1;if(r.tagName==="object"&&this._allowedVideoRegex.test(r.innerHTML))return!1}return!0})},_hasAncestorTag:function(t,e,i,r){i=i||3,e=e.toUpperCase();for(var l=0;t.parentNode;){if(i>0&&l>i)return!1;if(t.parentNode.tagName===e&&(!r||r(t.parentNode)))return!0;t=t.parentNode,l++}return!1},_getRowAndColumnCount:function(t){for(var e=0,i=0,r=t.getElementsByTagName("tr"),l=0;l<r.length;l++){var a=r[l].getAttribute("rowspan")||0;a&&(a=parseInt(a,10)),e+=a||1;for(var s=0,h=r[l].getElementsByTagName("td"),c=0;c<h.length;c++){var n=h[c].getAttribute("colspan")||0;n&&(n=parseInt(n,10)),s+=n||1}i=Math.max(i,s)}return{rows:e,columns:i}},_markDataTables:function(t){for(var e=t.getElementsByTagName("table"),i=0;i<e.length;i++){var r=e[i],l=r.getAttribute("role");if(l=="presentation"){r._readabilityDataTable=!1;continue}var a=r.getAttribute("datatable");if(a=="0"){r._readabilityDataTable=!1;continue}var s=r.getAttribute("summary");if(s){r._readabilityDataTable=!0;continue}var h=r.getElementsByTagName("caption")[0];if(h&&h.childNodes.length>0){r._readabilityDataTable=!0;continue}var c=["col","colgroup","tfoot","thead","th"],n=function(m){return!!r.getElementsByTagName(m)[0]};if(c.some(n)){this.log("Data table because found data-y descendant"),r._readabilityDataTable=!0;continue}if(r.getElementsByTagName("table")[0]){r._readabilityDataTable=!1;continue}var u=this._getRowAndColumnCount(r);if(u.rows>=10||u.columns>4){r._readabilityDataTable=!0;continue}r._readabilityDataTable=u.rows*u.columns>10}},_fixLazyImages:function(t){this._forEachNode(this._getAllNodesWithTag(t,["img","picture","figure"]),function(e){if(e.src&&this.REGEXPS.b64DataUrl.test(e.src)){var i=this.REGEXPS.b64DataUrl.exec(e.src);if(i[1]==="image/svg+xml")return;for(var r=!1,l=0;l<e.attributes.length;l++){var a=e.attributes[l];if(a.name!=="src"&&/\\.(jpg|jpeg|png|webp)/i.test(a.value)){r=!0;break}}if(r){var s=e.src.search(/base64\\s*/i)+7,h=e.src.length-s;h<133&&e.removeAttribute("src")}}if(!((e.src||e.srcset&&e.srcset!="null")&&e.className.toLowerCase().indexOf("lazy")===-1)){for(var c=0;c<e.attributes.length;c++)if(a=e.attributes[c],!(a.name==="src"||a.name==="srcset"||a.name==="alt")){var n=null;if(/\\.(jpg|jpeg|png|webp)\\s+\\d/.test(a.value)?n="srcset":/^\\s*\\S+\\.(jpg|jpeg|png|webp)\\S*\\s*$/.test(a.value)&&(n="src"),n){if(e.tagName==="IMG"||e.tagName==="PICTURE")e.setAttribute(n,a.value);else if(e.tagName==="FIGURE"&&!this._getAllNodesWithTag(e,["img","picture"]).length){var u=this._doc.createElement("img");u.setAttribute(n,a.value),e.appendChild(u)}}}}})},_getTextDensity:function(t,e){var i=this._getInnerText(t,!0).length;if(i===0)return 0;var r=0,l=this._getAllNodesWithTag(t,e);return this._forEachNode(l,a=>r+=this._getInnerText(a,!0).length),r/i},_cleanConditionally:function(t,e){this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)&&this._removeNodes(this._getAllNodesWithTag(t,[e]),function(i){var r=function(g){return g._readabilityDataTable},l=e==="ul"||e==="ol";if(!l){var a=0,s=this._getAllNodesWithTag(i,["ul","ol"]);this._forEachNode(s,g=>a+=this._getInnerText(g).length),l=a/this._getInnerText(i).length>.9}if(e==="table"&&r(i)||this._hasAncestorTag(i,"table",-1,r)||this._hasAncestorTag(i,"code"))return!1;var h=this._getClassWeight(i);this.log("Cleaning Conditionally",i);var c=0;if(h+c<0)return!0;if(this._getCharCount(i,",")<10){for(var n=i.getElementsByTagName("p").length,u=i.getElementsByTagName("img").length,m=i.getElementsByTagName("li").length-100,b=i.getElementsByTagName("input").length,N=this._getTextDensity(i,["h1","h2","h3","h4","h5","h6"]),v=0,y=this._getAllNodesWithTag(i,["object","embed","iframe"]),E=0;E<y.length;E++){for(var d=0;d<y[E].attributes.length;d++)if(this._allowedVideoRegex.test(y[E].attributes[d].value))return!1;if(y[E].tagName==="object"&&this._allowedVideoRegex.test(y[E].innerHTML))return!1;v++}var p=this._getLinkDensity(i),x=this._getInnerText(i).length,o=u>1&&n/u<.5&&!this._hasAncestorTag(i,"figure")||!l&&m>n||b>Math.floor(n/3)||!l&&N<.9&&x<25&&(u===0||u>2)&&!this._hasAncestorTag(i,"figure")||!l&&h<25&&p>.2||h>=25&&p>.5||v===1&&x<75||v>1;if(l&&o){for(var L=0;L<i.children.length;L++)if(i.children[L].children.length>1)return o;let g=i.getElementsByTagName("li").length;if(u==g)return!1}return o}return!1})},_cleanMatchedNodes:function(t,e){for(var i=this._getNextNode(t,!0),r=this._getNextNode(t);r&&r!=i;)e.call(this,r,r.className+" "+r.id)?r=this._removeAndGetNext(r):r=this._getNextNode(r)},_cleanHeaders:function(t){let e=this._getAllNodesWithTag(t,["h1","h2"]);this._removeNodes(e,function(i){let r=this._getClassWeight(i)<0;return r&&this.log("Removing header with low class weight:",i),r})},_headerDuplicatesTitle:function(t){if(t.tagName!="H1"&&t.tagName!="H2")return!1;var e=this._getInnerText(t,!1);return this.log("Evaluating similarity of header:",e,this._articleTitle),this._textSimilarity(this._articleTitle,e)>.75},_flagIsActive:function(t){return(this._flags&t)>0},_removeFlag:function(t){this._flags=this._flags&~t},_isProbablyVisible:function(t){return(!t.style||t.style.display!="none")&&(!t.style||t.style.visibility!="hidden")&&!t.hasAttribute("hidden")&&(!t.hasAttribute("aria-hidden")||t.getAttribute("aria-hidden")!="true"||t.className&&t.className.indexOf&&t.className.indexOf("fallback-image")!==-1)},parse:function(){if(this._maxElemsToParse>0){var t=this._doc.getElementsByTagName("*").length;if(t>this._maxElemsToParse)throw new Error("Aborting parsing document; "+t+" elements found")}this._unwrapNoscriptImages(this._doc);var e=this._disableJSONLD?{}:this._getJSONLD(this._doc);this._removeScripts(this._doc),this._prepDocument();var i=this._getArticleMetadata(e);this._articleTitle=i.title;var r=this._grabArticle();if(!r)return null;if(this.log("Grabbed: "+r.innerHTML),this._postProcessContent(r),!i.excerpt){var l=r.getElementsByTagName("p");l.length>0&&(i.excerpt=l[0].textContent.trim())}var a=r.textContent;return{title:this._articleTitle,byline:i.byline||this._articleByline,dir:this._articleDir,lang:this._articleLang,content:this._serializer(r),textContent:a,length:a.length,excerpt:i.excerpt,siteName:i.siteName||this._articleSiteName,publishedTime:i.publishedTime}}};typeof module=="object"&&(module.exports=q);\n';import{defaultLogger as we}from"@agent-infra/logger";import pe from"turndown";import{gfm as de}from"turndown-plugin-gfm";import{defaultLogger as me}from"@agent-infra/logger";import fe from"user-agents";var ye=i=>{try{return new URL(i)}catch{return null}},V=i=>{let t=ye(i);if(!t)return!0;let{hostname:e}=t;return["reddit.com","www.reddit.com","x.com","twitter.com","www.twitter.com","youtube.com","www.youtube.com"].includes(e)};async function be(i){let t=new fe({deviceCategory:"desktop"}).toString();await i.setBypassCSP(!0),await i.setUserAgent(t),await i.evaluate(()=>{Object.defineProperty(navigator,"webdriver",{get:()=>{}}),Object.defineProperty(navigator,"languages",{get:()=>["en-US","en"]}),Object.defineProperty(navigator,"plugins",{get:()=>[{},{},{},{},{}]}),Object.defineProperty(navigator,"headless",{get:()=>!1});let e=window.navigator.permissions.query;window.navigator.permissions.query=r=>r.name==="notifications"?Promise.resolve({state:Notification.permission}):e(r)})}async function B(i){await be(i),await i.setRequestInterception(!0),i.on("request",t=>t.resourceType()!=="document"?t.abort():t.isNavigationRequest()?t.continue():t.abort())}function X(i,t){let e=new Function("module",`${t}
 3 | return module.exports`)({}),r=i.document;r.querySelectorAll("script,noscript,style,link,svg,img,video,iframe,canvas,.reflist").forEach(s=>s.remove());let n=new e(r).parse(),a=n?.content||"",o=r.title;return{content:a,title:n?.title||o}}function K(i,t={}){if(!i)return"";try{let{codeBlockStyle:e="fenced",headingStyle:r="atx",emDelimiter:n="*",strongDelimiter:a="**",gfmExtension:o=!0}=t,s=new pe({codeBlockStyle:e,headingStyle:r,emDelimiter:n,strongDelimiter:a});return o&&s.use(de),s.turndown(i)}catch(e){return me.error("Error converting HTML to Markdown:",e),i}}var x=class{queue=[];concurrency;running=0;results=[];constructor(t=1){this.concurrency=t}add(t){return new Promise((e,r)=>{this.queue.push(async()=>{try{let n=await t();return e(n),n}catch(n){throw r(n),n}}),this.run()})}async run(){if(this.running>=this.concurrency||this.queue.length===0)return;this.running++;let t=this.queue.shift();try{let e=await t();this.results.push(e)}catch{}finally{this.running--,this.run()}}async waitAll(){for(;this.running>0||this.queue.length>0;)await new Promise(t=>setTimeout(t,100));return this.results}};var E=class{getSearchUrl(t,e){return`https://www.bing.com/search?${new URLSearchParams({q:`${e.excludeDomains&&e.excludeDomains.length>0?`${e.excludeDomains.map(n=>`-site:${n}`).join(" ")} `:""}${t}`,count:`${e.count||10}`}).toString()}`}extractSearchResults(t){let e=[],r=t.document,n=o=>{try{return new URL(o),!0}catch{return!1}},a=o=>{let s=o.cloneNode(!0);return s.querySelectorAll("h2").forEach(l=>l.remove()),s.querySelectorAll(".b_attribution").forEach(l=>l.remove()),s.querySelectorAll("script, style").forEach(l=>l.remove()),Array.from(s.querySelectorAll("*")).filter(l=>l.textContent?.trim()).map(l=>l.textContent?.trim()).filter(Boolean).reduce((l,p)=>(l.some(y=>y.includes(p)||p.includes(y))||l.push(p),l),[]).join(" ").trim().replace(/\s+/g," ")};try{r.querySelectorAll(".b_algo").forEach(s=>{let u=s.querySelector("h2"),c=s.querySelector("h2 a")?.getAttribute("href"),g=a(s);if(!c||!n(c))return;let l={title:u?.textContent||"",snippet:g,url:c,content:""};!l.title||!l.url||e.push(l)})}catch(o){throw console.error("Error extracting search results from Bing:",o),o}return e}async waitForSearchResults(t,e){await t.waitForSelector("#b_results",{timeout:e??1e4})}};var L=class{getSearchUrl(t,e){let r=e.excludeDomains&&e.excludeDomains.length>0?e.excludeDomains.map(a=>`-site:${a}`).join(" "):"";return`https://www.baidu.com/s?${new URLSearchParams({wd:r?`${r} ${t}`:t,rn:`${e.count||10}`}).toString()}`}extractSearchResults(t){let e=[],r=t.document;try{r.querySelectorAll(".result").forEach(a=>{let o=a.querySelector(".t a"),s=o?.getAttribute("href"),u=a.querySelector(".c-span-last .content-right_2s-H4");if(!s)return;let h={title:o?.textContent||"",url:s,snippet:u?.textContent||"",content:""};!h.title||!h.url||e.push(h)})}catch(n){console.error("Error extracting search results from Baidu:",n)}return e}async waitForSearchResults(t,e){await t.waitForSelector("#page",{timeout:e??1e4})}};var P=class{getSearchUrl(t,e){let{count:r=10,excludeDomains:n=[]}=e,a=n&&n.length>0?n.map(s=>`-site:${s}`).join(" "):"";return`https://www.sogou.com/web?${new URLSearchParams({query:`${a?`${a} `:""}${t}`,num:`${r}`}).toString()}`}extractSearchResults(t){let e=[],r=t.document,n=s=>{try{return new URL(s),!0}catch{return!1}},a="https://www.sogou.com",o={results:".results .vrwrap",resultTitle:".vr-title",resultLink:".vr-title > a",resultSnippet:[".star-wiki",".fz-mid",".attribute-centent"],resultSnippetExcluded:[".text-lightgray",".zan-box",".tag-website"],related:"#main .vrwrap.middle-better-hintBox .hint-mid"};try{r.querySelectorAll(o.results).forEach(u=>{let h=u.querySelector(o.resultTitle),c=u.querySelector(o.resultLink)?.getAttribute("href"),l=o.resultSnippet.map(y=>{let d=u.cloneNode(!0);return o.resultSnippetExcluded.forEach(O=>{d.querySelector(O)?.remove()}),d.querySelector(y)?.textContent?.trim()||""}).filter(Boolean).join(" ").replace(/\s+/g," ").trim();if(c?.includes("http")||(c=`${a}${c}`),!c?.trim()||!n(c))return;let p={title:h?.textContent?.trim()||"",url:c,snippet:l,content:""};!p.title||!p.url||e.push(p)})}catch(s){let u=s instanceof Error?s.message:String(s);throw console.error("Error extracting search results from Sogou:",u),s}return e}async waitForSearchResults(t,e){await t.waitForSelector("#pagebar_container",{timeout:e??1e4})}};var C=class{getSearchUrl(t,e){let r=new URLSearchParams({q:`${e.excludeDomains&&e.excludeDomains.length>0?`${e.excludeDomains.map(n=>`-site:${n}`).join(" ")} `:""}${t}`,num:`${e.count||10}`});return r.set("udm","14"),`https://www.google.com/search?${r.toString()}`}extractSearchResults(t){let e=[],r=t.document,n=o=>{try{return new URL(o),!0}catch{return!1}},a=o=>{let s=o.cloneNode(!0);return s.querySelectorAll("h3").forEach(l=>l.remove()),s.querySelectorAll("cite").forEach(l=>l.remove()),s.querySelectorAll("script, style").forEach(l=>l.remove()),Array.from(s.querySelectorAll("*")).filter(l=>l.textContent?.trim()).map(l=>l.textContent?.trim()).filter(Boolean).reduce((l,p)=>(l.some(y=>y.includes(p)||p.includes(y))||l.push(p),l),[]).join(" ").trim().replace(/\s+/g," ")};try{r.querySelectorAll(".tF2Cxc").forEach(s=>{let u=s.querySelector("h3"),c=s.querySelector("a")?.getAttribute("href"),g=a(s.parentElement||s);if(!c||!n(c))return;let l={title:u?.textContent||"",url:c,snippet:g,content:""};!l.title||!l.url||e.push(l)})}catch(o){console.error(o)}return e}async waitForSearchResults(t,e){await t.waitForSelector("#search",{timeout:e??1e4})}};function k(i){switch(i){case"bing":return new E;case"baidu":return new L;case"sogou":return new P;case"google":return new C;default:return new E}}var R=class{constructor(t={}){this.config=t;this.logger=t?.logger??we,this.browser=t.browser??new A({logger:this.logger}),this.defaultEngine=t.defaultEngine??"bing"}logger;browser;isBrowserOpen=!1;defaultEngine;async perform(t){this.logger.info("Starting search with options:",t);let e=Array.isArray(t.query)?t.query:[t.query],r=t.excludeDomains||[],n=t.count&&Math.max(3,Math.floor(t.count/e.length)),a=t.engine||this.defaultEngine;try{this.isBrowserOpen?this.logger.info("Using existing browser instance"):(this.logger.info("Launching browser"),await this.browser.launch(this.config.browserOptions),this.isBrowserOpen=!0);let o=new x(t.concurrency||15),s=new Set,u=await Promise.all(e.map(h=>this.search(this.browser,{query:h,count:n,queue:o,visitedUrls:s,excludeDomains:r,truncate:t.truncate,needVisitedUrls:t.needVisitedUrls,engine:a})));return this.logger.success("Search completed successfully"),u.flat()}catch(o){return this.logger.error("Search failed:",o),[]}finally{!t.keepBrowserOpen&&this.isBrowserOpen&&await this.closeBrowser()}}async closeBrowser(){this.isBrowserOpen&&(this.logger.info("Closing browser"),await this.browser.close(),this.isBrowserOpen=!1)}async search(t,e){let r=k(e.engine),n=r.getSearchUrl(e.query,{count:e.count,excludeDomains:e.excludeDomains});this.logger.info(`Searching with ${e.engine} engine: ${n}`);let a=await t.evaluateOnNewPage({url:n,waitForOptions:{waitUntil:"networkidle2"},pageFunction:r.extractSearchResults,pageFunctionParams:[],beforePageLoad:async s=>{await B(s)},afterPageLoad:async s=>{r.waitForSearchResults&&await r.waitForSearchResults(s,1e4)}});return this.logger.info(`Fetched ${a?.length??0} links`),a=a?.filter(s=>e.visitedUrls.has(s.url)?!1:(e.visitedUrls.add(s.url),!V(s.url)))||[],a.length?(await Promise.allSettled(e.needVisitedUrls?a.map(s=>e.queue.add(()=>this.visitLink(this.browser,s))):a)).map(s=>s.status==="rejected"||!s.value?null:{...s.value,content:e.truncate?s.value.content.slice(0,e.truncate):s.value.content}).filter(s=>s!==null):(this.logger.info("No valid links found"),[])}async visitLink(t,e){try{this.logger.info("Visiting link:",e.url);let r=await t.evaluateOnNewPage({url:e.url,pageFunction:X,pageFunctionParams:[W],beforePageLoad:async n=>{await B(n)}});if(r){let n=K(r.content);return{...r,url:e.url,content:n,snippet:e.snippet}}}catch(r){this.logger.error("Failed to visit link:",r)}}};import{ConsoleLogger as _e}from"@agent-infra/logger";var Y=new _e("[LocalSearch]");async function z(i){let{query:t,limit:e=10}=i,{engines:r="all"}=i,n=new R({logger:Y,browserOptions:{headless:!0}});r==="all"&&(r="bing,google,baidu,sogou");try{let a=r.split(",");if(a.length===0)throw new Error("engines is required");let o=[];for(let s of a){let u=await n.perform({query:t,count:e,engine:s,needVisitedUrls:!1});if(u.length>0){o.push(...u);break}}return Y.info(`Found ${o.length} results for ${t}`,o),{results:o,success:!0}}catch(a){let o=a instanceof Error?a.message:"Local search error.";throw process.stdout.write(o),a}finally{await n.closeBrowser()}}var J={name:"one_search",description:"Search and retrieve content from web pages. Returns SERP results by default (url, title, description).",inputSchema:{type:"object",properties:{query:{type:"string",description:"Search query string"},limit:{type:"number",description:"Maximum number of results to return (default: 10)"},language:{type:"string",description:"Language code for search results (default: auto)"},categories:{type:"string",enum:["general","news","images","videos","it","science","map","music","files","social_media"],description:"Categories to search for (default: general)"},timeRange:{type:"string",description:"Time range for search results (default: all)",enum:["all","day","week","month","year"]}},required:["query"]}},Q={name:"one_map",description:"Discover URLs from a starting point. Can use both sitemap.xml and HTML link discovery.",inputSchema:{type:"object",properties:{url:{type:"string",description:"Starting URL for URL discovery"},search:{type:"string",description:"Optional search term to filter URLs"},ignoreSitemap:{type:"boolean",description:"Skip sitemap.xml discovery and only use HTML links"},sitemapOnly:{type:"boolean",description:"Only use sitemap.xml for discovery, ignore HTML links"},includeSubdomains:{type:"boolean",description:"Include URLs from subdomains in results"},limit:{type:"number",description:"Maximum number of URLs to return"}},required:["url"]}},Z={name:"one_scrape",description:"Scrape a single webpage with advanced options for content extraction. Supports various formats including markdown, HTML, and screenshots. Can execute custom actions like clicking or scrolling before scraping.",inputSchema:{type:"object",properties:{url:{type:"string",description:"The URL to scrape"},formats:{type:"array",items:{type:"string",enum:["markdown","html","rawHtml","screenshot","links","screenshot@fullPage","extract"]},description:"Content formats to extract (default: ['markdown'])"},onlyMainContent:{type:"boolean",description:"Extract only the main content, filtering out navigation, footers, etc."},includeTags:{type:"array",items:{type:"string"},description:"HTML tags to specifically include in extraction"},excludeTags:{type:"array",items:{type:"string"},description:"HTML tags to exclude from extraction"},waitFor:{type:"number",description:"Time in milliseconds to wait for dynamic content to load"},timeout:{type:"number",description:"Maximum time in milliseconds to wait for the page to load"},actions:{type:"array",items:{type:"object",properties:{type:{type:"string",enum:["wait","click","screenshot","write","press","scroll","scrape","executeJavascript"],description:"Type of action to perform"},selector:{type:"string",description:"CSS selector for the target element"},milliseconds:{type:"number",description:"Time to wait in milliseconds (for wait action)"},text:{type:"string",description:"Text to write (for write action)"},key:{type:"string",description:"Key to press (for press action)"},direction:{type:"string",enum:["up","down"],description:"Scroll direction"},script:{type:"string",description:"JavaScript code to execute"},fullPage:{type:"boolean",description:"Take full page screenshot"}},required:["type"]},description:"List of actions to perform before scraping"},extract:{type:"object",properties:{schema:{type:"object",description:"Schema for structured data extraction"},systemPrompt:{type:"string",description:"System prompt for LLM extraction"},prompt:{type:"string",description:"User prompt for LLM extraction"}},description:"Configuration for structured data extraction"},mobile:{type:"boolean",description:"Use mobile viewport"},skipTlsVerification:{type:"boolean",description:"Skip TLS certificate verification"},removeBase64Images:{type:"boolean",description:"Remove base64 encoded images from output"},location:{type:"object",properties:{country:{type:"string",description:"Country code for geolocation"},languages:{type:"array",items:{type:"string"},description:"Language codes for content"}},description:"Location settings for scraping"}},required:["url"]}},ee={name:"one_extract",description:"Extract structured information from web pages using LLM. Supports both cloud AI and self-hosted LLM extraction.",inputSchema:{type:"object",properties:{urls:{type:"array",items:{type:"string"},description:"List of URLs to extract information from"},prompt:{type:"string",description:"Prompt for the LLM extraction"},systemPrompt:{type:"string",description:"System prompt for LLM extraction"},schema:{type:"object",description:"JSON schema for structured data extraction"},allowExternalLinks:{type:"boolean",description:"Allow extraction from external links"},enableWebSearch:{type:"boolean",description:"Enable web search for additional context"},includeSubdomains:{type:"boolean",description:"Include subdomains in extraction"}},required:["urls"]}};import Ne from"@mendable/firecrawl-js";import Ae from"@dotenvx/dotenvx";import{SafeSearchType as U}from"duck-duck-scrape";Ae.config();var xe=process.env.SEARCH_API_URL,v=process.env.SEARCH_API_KEY,te=process.env.SEARCH_PROVIDER??"local",Le=process.env.SAFE_SEARCH??0,Pe=process.env.LIMIT??10,Ce=process.env.CATEGORIES??"general",Re=process.env.ENGINES??"all",Oe=process.env.FORMAT??"json",Ie=process.env.LANGUAGE??"auto",De=process.env.TIME_RANGE??"",Be=process.env.TIMEOUT??1e4,ke=process.env.FIRECRAWL_API_KEY,re=process.env.FIRECRAWL_API_URL,ie=new Ne({apiKey:ke??"",...re?{apiUrl:re}:{}}),m=new Se({name:"one-search-mcp",version:"0.0.1"},{capabilities:{tools:{},logging:{}}}),b={limit:Number(Pe),categories:Ce,format:Oe,safesearch:Le,language:Ie,engines:Re,time_range:De,timeout:Be};m.setRequestHandler(ve,async()=>({tools:[J,ee,Z,Q]}));m.setRequestHandler(Ee,async i=>{let t=Date.now();try{let{name:e,arguments:r}=i.params;if(!r)throw new Error("No arguments provided");switch(m.sendLoggingMessage({level:"info",data:`[${new Date().toISOString()}] Received request for tool: [${e}]`}),e){case"one_search":{if(!Fe(r))throw new Error(`Invalid arguments for tool: [${e}]`);try{let{results:n,success:a}=await Ue({...r,apiKey:v??"",apiUrl:xe});if(!a)throw new Error("Failed to search");return{content:[{type:"text",text:n.map(s=>`Title: ${s.title}
 4 | URL: ${s.url}
 5 | Description: ${s.snippet}
 6 | ${s.markdown?`Content: ${s.markdown}`:""}`).join(`
 7 | 
 8 | `)}],results:n,success:a}}catch(n){return m.sendLoggingMessage({level:"error",data:`[${new Date().toISOString()}] Error searching: ${n}`}),{success:!1,content:[{type:"text",text:n instanceof Error?n.message:"Unknown error"}]}}}case"one_scrape":{if(!Ge(r))throw new Error(`Invalid arguments for tool: [${e}]`);try{let n=Date.now();m.sendLoggingMessage({level:"info",data:`[${new Date().toISOString()}] Scraping started for url: [${r.url}]`});let{url:a,...o}=r,{content:s,success:u,result:h}=await Me(a,o);return m.sendLoggingMessage({level:"info",data:`[${new Date().toISOString()}] Scraping completed in ${Date.now()-n}ms`}),{content:s,result:h,success:u}}catch(n){return m.sendLoggingMessage({level:"error",data:`[${new Date().toISOString()}] Error scraping: ${n}`}),{success:!1,content:[{type:"text",text:n instanceof Error?n.message:"Unknown error"}]}}}case"one_map":{if(!qe(r))throw new Error(`Invalid arguments for tool: [${e}]`);try{let{content:n,success:a,result:o}=await $e(r.url,r);return{content:n,result:o,success:a}}catch(n){return m.sendLoggingMessage({level:"error",data:`[${new Date().toISOString()}] Error mapping: ${n}`}),{success:!1,content:[{type:"text",text:n instanceof Error?n.message:String(n)}]}}}default:throw new Error(`Unknown tool: ${e}`)}}catch(e){let r=e instanceof Error?e.message:String(e);return m.sendLoggingMessage({level:"error",data:{message:`[${new Date().toISOString()}] Error processing request: ${r}`,tool:i.params.name,arguments:i.params.arguments,timestamp:new Date().toISOString(),duration:Date.now()-t}}),{success:!1,content:[{type:"text",text:r}]}}finally{m.sendLoggingMessage({level:"info",data:`[${new Date().toISOString()}] Request completed in ${Date.now()-t}ms`})}});async function Ue(i){switch(te){case"searxng":{let t={...b,...i,apiKey:v},{categories:e,language:r}=b;return e&&(t.categories=e),r&&(t.language=r),await G(t)}case"tavily":return await q({...b,...i,apiKey:v});case"bing":return await $({...b,...i,apiKey:v});case"duckduckgo":{let t=i.safeSearch??0,e=[U.STRICT,U.MODERATE,U.OFF];return await F({...b,...i,apiKey:v,safeSearch:e[t]})}case"local":return await z({...b,...i});default:throw new Error(`Unsupported search provider: ${te}`)}}async function Me(i,t){let e=await ie.scrapeUrl(i,{...t});if(!e.success)throw new Error(`Failed to scrape: ${e.error}`);let r=[];return e.markdown&&r.push(e.markdown),e.rawHtml&&r.push(e.rawHtml),e.links&&r.push(e.links.join(`
 9 | `)),e.screenshot&&r.push(e.screenshot),e.html&&r.push(e.html),e.extract&&r.push(e.extract),{content:[{type:"text",text:r.join(`
10 | 
11 | `)||"No content found"}],result:e,success:!0}}async function $e(i,t){let e=await ie.mapUrl(i,{...t});if("error"in e)throw new Error(`Failed to map: ${e.error}`);if(!e.links)throw new Error(`No links found from: ${i}`);return{content:[{type:"text",text:e.links.join(`
12 | `).trim()}],result:e.links,success:!0}}function Fe(i){return typeof i=="object"&&i!==null&&"query"in i&&typeof i.query=="string"}function Ge(i){return typeof i=="object"&&i!==null&&"url"in i&&typeof i.url=="string"}function qe(i){return typeof i=="object"&&i!==null&&"url"in i&&typeof i.url=="string"}async function He(){try{process.stdout.write(`Starting OneSearch MCP server...
13 | `);let i=new Te;await m.connect(i),m.sendLoggingMessage({level:"info",data:"OneSearch MCP server started"})}catch(i){let t=i instanceof Error?i.message:String(i);process.stderr.write(`Error starting server: ${t}
14 | `),process.exit(1)}}He().catch(i=>{let t=i instanceof Error?i.message:String(i);process.stderr.write(`Error running server: ${t}
15 | `),process.exit(1)});
16 | //# sourceMappingURL=index.js.map


--------------------------------------------------------------------------------
/eslint.config.mjs:
--------------------------------------------------------------------------------
 1 | import eslint from '@eslint/js';
 2 | import tseslint from 'typescript-eslint';
 3 | 
 4 | export default tseslint.config(
 5 |   eslint.configs.recommended,
 6 |   ...tseslint.configs.recommended,
 7 |   {
 8 |     ignores: [
 9 |       'node_modules/**',
10 |       'dist/**',
11 |       'build/**',
12 |       'coverage/**',
13 |       '*.js',
14 |       '*.d.ts',
15 |     ],
16 |     languageOptions: {
17 |       ecmaVersion: 2020,
18 |       sourceType: 'module',
19 |       parser: tseslint.parser,
20 |     },
21 |     rules: {
22 |       'no-console': 'off',
23 |       'no-unused-vars': 'off',
24 |       '@typescript-eslint/no-unused-vars': ['error', { argsIgnorePattern: '^_' }],
25 |       '@typescript-eslint/no-explicit-any': 'warn',
26 |       'quotes': ['error', 'single', { avoidEscape: true }],
27 |       'semi': ['error', 'always'],
28 |       'indent': ['error', 2, { SwitchCase: 1 }],
29 |       'comma-dangle': ['error', 'always-multiline'],
30 |     },
31 |   },
32 | ); 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "name": "one-search-mcp",
  3 |   "version": "1.0.11",
  4 |   "description": "One Search MCP Server, Web Search & Crawl & Scraper & Extract, support Firecrawl, SearXNG, Tavily, DuckDuckGo, Bing, etc.",
  5 |   "private": false,
  6 |   "type": "module",
  7 |   "keywords": [
  8 |     "AI",
  9 |     "LLM",
 10 |     "MCP",
 11 |     "ModelContextProtocol",
 12 |     "Firecrawl MCP Server",
 13 |     "Search MCP Server",
 14 |     "SearXNG MCP Server",
 15 |     "DuckDuckGo MCP Server",
 16 |     "Bing MCP Server",
 17 |     "Tavily MCP Server",
 18 |     "Web Search",
 19 |     "LLM Tool",
 20 |     "One Search"
 21 |   ],
 22 |   "author": "zac.ma",
 23 |   "license": "MIT",
 24 |   "repository": {
 25 |     "type": "git",
 26 |     "url": "https://github.com/yokingma/one-search-mcp.git"
 27 |   },
 28 |   "main": "./dist/index.cjs",
 29 |   "module": "./dist/index.js",
 30 |   "types": "./dist/index.d.ts",
 31 |   "bin": {
 32 |     "one-search-mcp": "dist/index.js"
 33 |   },
 34 |   "files": [
 35 |     "dist/**"
 36 |   ],
 37 |   "publishConfig": {
 38 |     "access": "public"
 39 |   },
 40 |   "engines": {
 41 |     "node": ">=20.0.0"
 42 |   },
 43 |   "scripts": {
 44 |     "dev": "dotenvx run -- cross-env NODE_ENV=development tsx src/index.ts",
 45 |     "build": "tsup && node -e \"require('fs').chmodSync('dist/index.js', '755')\"",
 46 |     "start": "node dist/index.js",
 47 |     "lint": "eslint src",
 48 |     "lint:fix": "eslint src --fix"
 49 |   },
 50 |   "tsup": {
 51 |     "entry": [
 52 |       "src/index.ts"
 53 |     ],
 54 |     "outDir": "dist",
 55 |     "format": [
 56 |       "cjs",
 57 |       "esm"
 58 |     ],
 59 |     "splitting": false,
 60 |     "dts": true,
 61 |     "clean": true,
 62 |     "sourcemap": true,
 63 |     "minify": true
 64 |   },
 65 |   "exports": {
 66 |     ".": {
 67 |       "require": "./dist/index.cjs",
 68 |       "import": "./dist/index.js"
 69 |     }
 70 |   },
 71 |   "devDependencies": {
 72 |     "@eslint/js": "^8.56.0",
 73 |     "@types/async-retry": "^1.4.9",
 74 |     "@types/node": "^22.13.10",
 75 |     "@types/turndown": "^5.0.5",
 76 |     "@types/user-agents": "^1.0.4",
 77 |     "@typescript-eslint/eslint-plugin": "^7.0.0",
 78 |     "@typescript-eslint/parser": "^7.0.0",
 79 |     "cross-env": "^7.0.3",
 80 |     "eslint": "^8.56.0",
 81 |     "tsup": "^8.4.0",
 82 |     "tsx": "^4.19.3",
 83 |     "typescript": "^5.3.3",
 84 |     "typescript-eslint": "^7.0.0"
 85 |   },
 86 |   "dependencies": {
 87 |     "@agent-infra/logger": "^0.0.2-beta.0",
 88 |     "@dotenvx/dotenvx": "^1.38.5",
 89 |     "@mendable/firecrawl-js": "^1.20.1",
 90 |     "@modelcontextprotocol/sdk": "^1.7.0",
 91 |     "@tavily/core": "^0.3.1",
 92 |     "async-retry": "^1.3.3",
 93 |     "duck-duck-scrape": "^2.2.7",
 94 |     "puppeteer-core": "^24.4.0",
 95 |     "turndown": "^7.2.0",
 96 |     "turndown-plugin-gfm": "^1.0.2",
 97 |     "user-agents": "^1.1.495"
 98 |   }
 99 | }
100 | 


--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------
 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml
 2 | 
 3 | startCommand:
 4 |   type: stdio
 5 |   configSchema:
 6 |     # JSON Schema defining the configuration options for the MCP.
 7 |     type: object
 8 |     required: []
 9 |     properties:
10 |       searchProvider:
11 |         type: string
12 |         default: searxng
13 |         description: "Search provider to use. Options: searxng, duckduckgo, bing, tavily."
14 |       searchApiUrl:
15 |         type: string
16 |         description: API URL for the search provider (required for searxng).
17 |       searchApiKey:
18 |         type: string
19 |         description: API Key for the search provider (required for tavily or bing).
20 |       firecrawlApiUrl:
21 |         type: string
22 |         description: API URL for firecrawl.
23 |       firecrawlApiKey:
24 |         type: string
25 |         description: API Key for firecrawl if required.
26 |   commandFunction:
27 |     # A JS function that produces the CLI command based on the given config to start the MCP on stdio.
28 |     |-
29 |     (config) => ({
30 |       command: 'node',
31 |       args: ['dist/index.js'],
32 |       env: {
33 |         SEARCH_PROVIDER: config.searchProvider || 'searxng',
34 |         SEARCH_API_URL: config.searchApiUrl || '',
35 |         SEARCH_API_KEY: config.searchApiKey || '',
36 |         FIRECRAWL_API_URL: config.firecrawlApiUrl || '',
37 |         FIRECRAWL_API_KEY: config.firecrawlApiKey || ''
38 |       }
39 |     })
40 |   exampleConfig:
41 |     searchProvider: searxng
42 |     searchApiUrl: http://127.0.0.1:8080
43 |     searchApiKey: YOUR_API_KEY
44 |     firecrawlApiUrl: http://127.0.0.1:3002
45 |     firecrawlApiKey: YOUR_API_KEY
46 | 


--------------------------------------------------------------------------------
/src/global.d.ts:
--------------------------------------------------------------------------------
1 | declare module 'turndown-plugin-gfm' {
2 |   // eslint-disable-next-line @typescript-eslint/no-explicit-any
3 |   export function gfm(): any;
4 | }


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env node
  2 | 
  3 | import { Server } from '@modelcontextprotocol/sdk/server/index.js';
  4 | import  { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
  5 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
  6 | import { ISearchRequestOptions, ISearchResponse, SearchProvider } from './interface.js';
  7 | import { bingSearch, duckDuckGoSearch, searxngSearch, tavilySearch, localSearch } from './search/index.js';
  8 | import { SEARCH_TOOL, EXTRACT_TOOL, SCRAPE_TOOL, MAP_TOOL } from './tools.js';
  9 | import FirecrawlApp, { MapParams, ScrapeParams } from '@mendable/firecrawl-js';
 10 | import dotenvx from '@dotenvx/dotenvx';
 11 | import { SafeSearchType } from 'duck-duck-scrape';
 12 | 
 13 | dotenvx.config();
 14 | 
 15 | // search api
 16 | const SEARCH_API_URL = process.env.SEARCH_API_URL;
 17 | const SEARCH_API_KEY = process.env.SEARCH_API_KEY;
 18 | const SEARCH_PROVIDER: SearchProvider = process.env.SEARCH_PROVIDER as SearchProvider ?? 'local';
 19 | 
 20 | // search query params
 21 | const SAFE_SEARCH = process.env.SAFE_SEARCH ?? 0;
 22 | const LIMIT = process.env.LIMIT ?? 10;
 23 | const CATEGORIES = process.env.CATEGORIES ?? 'general';
 24 | const ENGINES = process.env.ENGINES ?? 'all';
 25 | const FORMAT = process.env.FORMAT ?? 'json';
 26 | const LANGUAGE = process.env.LANGUAGE ?? 'auto';
 27 | const TIME_RANGE = process.env.TIME_RANGE ?? '';
 28 | const DEFAULT_TIMEOUT = process.env.TIMEOUT ?? 10000;
 29 | 
 30 | // firecrawl api
 31 | const FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY;
 32 | const FIRECRAWL_API_URL = process.env.FIRECRAWL_API_URL;
 33 | 
 34 | // firecrawl client
 35 | const firecrawl = new FirecrawlApp({
 36 |   apiKey: FIRECRAWL_API_KEY ?? '',
 37 |   ...(FIRECRAWL_API_URL ? { apiUrl: FIRECRAWL_API_URL } : {}),
 38 | });
 39 | 
 40 | // Server implementation
 41 | const server = new Server(
 42 |   {
 43 |     name: 'one-search-mcp',
 44 |     version: '0.0.1',
 45 |   },
 46 |   {
 47 |     capabilities: {
 48 |       tools: {},
 49 |       logging: {},
 50 |     },
 51 |   },
 52 | );
 53 | 
 54 | const searchDefaultConfig = {
 55 |   limit: Number(LIMIT),
 56 |   categories: CATEGORIES,
 57 |   format: FORMAT,
 58 |   safesearch: SAFE_SEARCH,
 59 |   language: LANGUAGE,
 60 |   engines: ENGINES,
 61 |   time_range: TIME_RANGE,
 62 |   timeout: DEFAULT_TIMEOUT,
 63 | };
 64 | 
 65 | // Tool handlers
 66 | server.setRequestHandler(ListToolsRequestSchema, async () => ({
 67 |   tools: [
 68 |     SEARCH_TOOL,
 69 |     EXTRACT_TOOL,
 70 |     SCRAPE_TOOL,
 71 |     MAP_TOOL,
 72 |   ],
 73 | }));
 74 | 
 75 | server.setRequestHandler(CallToolRequestSchema, async (request) => {
 76 |   const startTime = Date.now();
 77 | 
 78 |   try {
 79 |     const { name, arguments: args } = request.params;
 80 | 
 81 |     if (!args) {
 82 |       throw new Error('No arguments provided');
 83 |     }
 84 |   
 85 |     server.sendLoggingMessage({
 86 |       level: 'info',
 87 |       data: `[${new Date().toISOString()}] Received request for tool: [${name}]`,
 88 |     });
 89 |   
 90 |     switch (name) {
 91 |       case 'one_search': {
 92 |         // check args.
 93 |         if (!checkSearchArgs(args)) {
 94 |           throw new Error(`Invalid arguments for tool: [${name}]`);
 95 |         }
 96 |         try {
 97 |           const { results, success } = await processSearch({
 98 |             ...args,
 99 |             apiKey: SEARCH_API_KEY ?? '',
100 |             apiUrl: SEARCH_API_URL,
101 |           });
102 |           if (!success) {
103 |             throw new Error('Failed to search');
104 |           }
105 |           const resultsText = results.map((result) => (
106 |             `Title: ${result.title}
107 | URL: ${result.url}
108 | Description: ${result.snippet}
109 | ${result.markdown ? `Content: ${result.markdown}` : ''}`
110 |           ));
111 |           return {
112 |             content: [
113 |               {
114 |                 type: 'text',
115 |                 text: resultsText.join('\n\n'),
116 |               },
117 |             ],
118 |             results,
119 |             success,
120 |           };
121 |         } catch (error) {
122 |           server.sendLoggingMessage({
123 |             level: 'error',
124 |             data: `[${new Date().toISOString()}] Error searching: ${error}`,
125 |           });
126 |           const msg = error instanceof Error ? error.message : 'Unknown error';
127 |           return {
128 |             success: false,
129 |             content: [
130 |               {
131 |                 type: 'text',
132 |                 text: msg,
133 |               },
134 |             ],
135 |           };
136 |         }
137 |       }
138 |       case 'one_scrape': {
139 |         if (!checkScrapeArgs(args)) {
140 |           throw new Error(`Invalid arguments for tool: [${name}]`);
141 |         }
142 |         try {
143 |           const startTime = Date.now();
144 |           server.sendLoggingMessage({
145 |             level: 'info',
146 |             data: `[${new Date().toISOString()}] Scraping started for url: [${args.url}]`,
147 |           });
148 | 
149 |           const { url, ...scrapeArgs } = args;
150 |           const { content, success, result } = await processScrape(url, scrapeArgs);
151 | 
152 |           server.sendLoggingMessage({
153 |             level: 'info',
154 |             data: `[${new Date().toISOString()}] Scraping completed in ${Date.now() - startTime}ms`,
155 |           });
156 | 
157 |           return {
158 |             content,
159 |             result,
160 |             success,
161 |           };
162 |         } catch (error) {
163 |           server.sendLoggingMessage({
164 |             level: 'error',
165 |             data: `[${new Date().toISOString()}] Error scraping: ${error}`,
166 |           });
167 |           const msg = error instanceof Error ? error.message : 'Unknown error';
168 |           return {
169 |             success: false,
170 |             content: [
171 |               {
172 |                 type: 'text',
173 |                 text: msg,
174 |               },
175 |             ],
176 |           };
177 |         }
178 |       }
179 |       case 'one_map': {
180 |         if (!checkMapArgs(args)) {
181 |           throw new Error(`Invalid arguments for tool: [${name}]`);
182 |         }
183 |         try {
184 |           const { content, success, result } = await processMapUrl(args.url, args);
185 |           return {
186 |             content,
187 |             result,
188 |             success,
189 |           };
190 |         } catch (error) {
191 |           server.sendLoggingMessage({
192 |             level: 'error',
193 |             data: `[${new Date().toISOString()}] Error mapping: ${error}`,
194 |           });
195 |           const msg = error instanceof Error ? error.message : String(error);
196 |           return {
197 |             success: false,
198 |             content: [
199 |               {
200 |                 type: 'text',
201 |                 text: msg,
202 |               },
203 |             ],
204 |           };
205 |         }
206 |       }
207 |       default: {
208 |         throw new Error(`Unknown tool: ${name}`);
209 |       }
210 |     }
211 |   } catch(error) {
212 |     const msg = error instanceof Error ? error.message : String(error);
213 |     server.sendLoggingMessage({
214 |       level: 'error',
215 |       data: {
216 |         message: `[${new Date().toISOString()}] Error processing request: ${msg}`,
217 |         tool: request.params.name,
218 |         arguments: request.params.arguments,
219 |         timestamp: new Date().toISOString(),
220 |         duration: Date.now() - startTime,
221 |       },
222 |     });
223 |     return {
224 |       success: false,
225 |       content: [
226 |         {
227 |           type: 'text',
228 |           text: msg,
229 |         },
230 |       ],
231 |     };
232 |   } finally {
233 |     server.sendLoggingMessage({
234 |       level: 'info',
235 |       data: `[${new Date().toISOString()}] Request completed in ${Date.now() - startTime}ms`,
236 |     });
237 |   }
238 | });
239 | 
240 | async function processSearch(args: ISearchRequestOptions): Promise<ISearchResponse> {
241 |   switch (SEARCH_PROVIDER) {
242 |     case 'searxng': {
243 |       // merge default config with args
244 |       const params = {
245 |         ...searchDefaultConfig,
246 |         ...args,
247 |         apiKey: SEARCH_API_KEY,
248 |       };
249 | 
250 |       // but categories and language have higher priority (ENV > args).
251 |       const { categories, language } = searchDefaultConfig;
252 | 
253 |       if (categories) {
254 |         params.categories = categories;
255 |       }
256 |       if (language) {
257 |         params.language = language;
258 |       }
259 |       return await searxngSearch(params);
260 |     }
261 |     case 'tavily': {
262 |       return await tavilySearch({
263 |         ...searchDefaultConfig,
264 |         ...args,
265 |         apiKey: SEARCH_API_KEY,
266 |       });
267 |     }
268 |     case 'bing': {
269 |       return await bingSearch({
270 |         ...searchDefaultConfig,
271 |         ...args,
272 |         apiKey: SEARCH_API_KEY,
273 |       });
274 |     }
275 |     case 'duckduckgo': {
276 |       const safeSearch = args.safeSearch ?? 0;
277 |       const safeSearchOptions = [SafeSearchType.STRICT, SafeSearchType.MODERATE, SafeSearchType.OFF];
278 |       return await duckDuckGoSearch({
279 |         ...searchDefaultConfig,
280 |         ...args,
281 |         apiKey: SEARCH_API_KEY,
282 |         safeSearch: safeSearchOptions[safeSearch],
283 |       });
284 |     }
285 |     case 'local': {
286 |       return await localSearch({
287 |         ...searchDefaultConfig,
288 |         ...args,
289 |       });
290 |     }
291 |     default:
292 |       throw new Error(`Unsupported search provider: ${SEARCH_PROVIDER}`);
293 |   }
294 | }
295 | 
296 | async function processScrape(url: string, args: ScrapeParams) {
297 |   const res = await firecrawl.scrapeUrl(url, {
298 |     ...args,
299 |   });
300 | 
301 |   if (!res.success) {
302 |     throw new Error(`Failed to scrape: ${res.error}`);
303 |   }
304 | 
305 |   const content: string[] = [];
306 | 
307 |   if (res.markdown) {
308 |     content.push(res.markdown);
309 |   }
310 | 
311 |   if (res.rawHtml) {
312 |     content.push(res.rawHtml);
313 |   }
314 | 
315 |   if (res.links) {
316 |     content.push(res.links.join('\n'));
317 |   }
318 | 
319 |   if (res.screenshot) {
320 |     content.push(res.screenshot);
321 |   }
322 | 
323 |   if (res.html) {
324 |     content.push(res.html);
325 |   }
326 | 
327 |   if (res.extract) {
328 |     content.push(res.extract);
329 |   }
330 | 
331 |   return {
332 |     content: [
333 |       {
334 |         type: 'text',
335 |         text: content.join('\n\n') || 'No content found',
336 |       },
337 |     ],
338 |     result: res,
339 |     success: true,
340 |   };
341 | }
342 | 
343 | async function processMapUrl(url: string, args: MapParams) {
344 |   const res = await firecrawl.mapUrl(url, {
345 |     ...args,
346 |   });
347 | 
348 |   if ('error' in res) {
349 |     throw new Error(`Failed to map: ${res.error}`);
350 |   }
351 | 
352 |   if (!res.links) {
353 |     throw new Error(`No links found from: ${url}`);
354 |   }
355 | 
356 |   return {
357 |     content: [
358 |       {
359 |         type: 'text',
360 |         text: res.links.join('\n').trim(),
361 |       },
362 |     ],
363 |     result: res.links,
364 |     success: true,
365 |   };
366 | }
367 | 
368 | function checkSearchArgs(args: unknown): args is ISearchRequestOptions {
369 |   return (
370 |     typeof args === 'object' &&
371 |     args !== null &&
372 |     'query' in args &&
373 |     typeof args.query === 'string'
374 |   );
375 | }
376 | 
377 | function checkScrapeArgs(args: unknown): args is ScrapeParams & { url: string } {
378 |   return (
379 |     typeof args === 'object' &&
380 |     args !== null &&
381 |     'url' in args &&
382 |     typeof args.url === 'string'
383 |   );
384 | }
385 | 
386 | function checkMapArgs(args: unknown): args is MapParams & { url: string } {
387 |   return (
388 |     typeof args === 'object' &&
389 |     args !== null &&
390 |     'url' in args &&
391 |     typeof args.url === 'string'
392 |   );
393 | }
394 | 
395 | async function runServer() {
396 |   try {
397 |     process.stdout.write('Starting OneSearch MCP server...\n');
398 | 
399 |     const transport = new StdioServerTransport();
400 |     await server.connect(transport);
401 | 
402 |     server.sendLoggingMessage({
403 |       level: 'info',
404 |       data: 'OneSearch MCP server started',
405 |     });
406 | 
407 |   } catch (error) {
408 |     const msg = error instanceof Error ? error.message : String(error);
409 |     process.stderr.write(`Error starting server: ${msg}\n`);
410 |     process.exit(1);
411 |   }
412 | }
413 | 
414 | // run server
415 | runServer().catch((error) => {
416 |   const msg = error instanceof Error ? error.message : String(error);
417 |   process.stderr.write(`Error running server: ${msg}\n`);
418 |   process.exit(1);
419 | });
420 | 
421 | // export types
422 | export * from './interface.js';
423 | 


--------------------------------------------------------------------------------
/src/interface.ts:
--------------------------------------------------------------------------------
 1 | import type AsyncRetry from 'async-retry';
 2 | 
 3 | export interface IMediaItem {
 4 |   thumbnail?: string;
 5 |   src?: string;
 6 | }
 7 | 
 8 | export interface ISearchRequestOptions {
 9 |   query: string;
10 |   page?: number;
11 |   limit?: number;
12 |   categories?: string;
13 |   format?: string;
14 |   language?: string;
15 |   // search engines: bing,google,baidu
16 |   engines?: string;
17 |   // 0: off, 1: moderate, 2: strict
18 |   safeSearch?: 0 | 1 | 2;
19 |   timeRange?: string;
20 |   timeout?: number | string;
21 |   apiKey?: string;
22 |   apiUrl?: string;
23 |   retry?: AsyncRetry.Options;
24 | }
25 | 
26 | export interface ISearchResponseResult {
27 |   title: string;
28 |   snippet: string;
29 |   url: string;
30 |   thumbnailUrl?: string;
31 |   markdown?: string;
32 |   source?: string;
33 |   engine?: string;
34 |   image?: IMediaItem | null;
35 |   video?: IMediaItem | null;
36 | }
37 | 
38 | export interface ISearchResponse {
39 |   results: ISearchResponseResult[];
40 |   success: boolean;
41 | }
42 | 
43 | export type SearchProvider = 'searxng' | 'duckduckgo' | 'bing' | 'tavily' | 'local';
44 | export type SearchTimeRange = 'year' | 'month' | 'week' | 'day';
45 | 


--------------------------------------------------------------------------------
/src/libs/browser-search/engines/baidu.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) 2025 Bytedance, Inc. and its affiliates.
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | import { Page } from '../../browser/index.js';
 6 | import type { SearchEngineAdapter, SearchResult } from '../types.js';
 7 | 
 8 | /**
 9 |  * Baidu search engine adapter implementation.
10 |  * Provides functionality to generate Baidu search URLs and extract search results from Baidu search pages.
11 |  */
12 | export class BaiduSearchEngine implements SearchEngineAdapter {
13 |   /**
14 |    * Generates a Baidu search URL based on the provided query and options.
15 |    *
16 |    * @param query - The search query string
17 |    * @param options - Search configuration options
18 |    * @param options.count - Number of search results to request (default: 10)
19 |    * @param options.excludeDomains - Array of domain names to exclude from search results
20 |    * @returns Formatted Baidu search URL as a string
21 |    */
22 |   getSearchUrl(
23 |     query: string,
24 |     options: {
25 |       count?: number;
26 |       excludeDomains?: string[];
27 |     },
28 |   ): string {
29 |     // Baidu doesn't support excluding domains in the same way as Google
30 |     // But we can add '-site:domain' to the query
31 |     const excludeDomainsQuery =
32 |       options.excludeDomains && options.excludeDomains.length > 0
33 |         ? options.excludeDomains.map((domain) => `-site:${domain}`).join(' ')
34 |         : '';
35 | 
36 |     const searchParams = new URLSearchParams({
37 |       wd: excludeDomainsQuery ? `${excludeDomainsQuery} ${query}` : query,
38 |       rn: `${options.count || 10}`, // rn is the parameter for result count
39 |     });
40 | 
41 |     return `https://www.baidu.com/s?${searchParams.toString()}`;
42 |   }
43 | 
44 |   /**
45 |    * Extracts search results from a Baidu search page.
46 |    *
47 |    * @param window - The browser window object containing the loaded Baidu search page
48 |    * @returns Array of search results extracted from the page
49 |    */
50 |   extractSearchResults(window: Window): SearchResult[] {
51 |     const links: SearchResult[] = [];
52 |     const document = window.document;
53 | 
54 |     try {
55 |       // Baidu search results are in elements with class 'result'
56 |       const elements = document.querySelectorAll('.result');
57 |       elements.forEach((element) => {
58 |         const titleEl = element.querySelector('.t a');
59 |         const url = titleEl?.getAttribute('href');
60 |         const snippetEl = element.querySelector('.c-span-last .content-right_2s-H4');
61 | 
62 |         if (!url) return;
63 | 
64 |         const item: SearchResult = {
65 |           title: titleEl?.textContent || '',
66 |           url, // Note: Baidu uses redirects, we'll need to follow them
67 |           snippet: snippetEl?.textContent || '',
68 |           content: '',
69 |         };
70 | 
71 |         if (!item.title || !item.url) return;
72 | 
73 |         links.push(item);
74 |       });
75 |     } catch (error) {
76 |       console.error('Error extracting search results from Baidu:', error);
77 |     }
78 | 
79 |     return links;
80 |   }
81 | 
82 |   /**
83 |   * Waits for Bing search results to load completely.
84 |   *
85 |   * @param page - The Puppeteer page object
86 |   * @returns Promise that resolves when search results are loaded
87 |   */
88 |   async waitForSearchResults(page: Page, timeout?: number): Promise<void> {
89 |     await page.waitForSelector('#page', {
90 |       timeout: timeout ?? 10000,
91 |     });
92 |   }
93 | }


--------------------------------------------------------------------------------
/src/libs/browser-search/engines/bing.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (c) 2025 Bytedance, Inc. and its affiliates.
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | import type { Page } from 'puppeteer-core';
  6 | import type { SearchEngineAdapter, SearchResult } from '../types.js';
  7 | 
  8 | /**
  9 |  * Bing search engine adapter implementation.
 10 |  * Provides functionality to generate Bing search URLs and extract search results from Bing search pages.
 11 |  */
 12 | export class BingSearchEngine implements SearchEngineAdapter {
 13 |   /**
 14 |    * Generates a Bing search URL based on the provided query and options.
 15 |    *
 16 |    * @param query - The search query string
 17 |    * @param options - Search configuration options
 18 |    * @param options.count - Number of search results to request (default: 10)
 19 |    * @param options.excludeDomains - Array of domain names to exclude from search results
 20 |    * @returns Formatted Bing search URL as a string
 21 |    */
 22 |   getSearchUrl(
 23 |     query: string,
 24 |     options: {
 25 |       count?: number;
 26 |       excludeDomains?: string[];
 27 |     },
 28 |   ): string {
 29 |     const searchParams = new URLSearchParams({
 30 |       q: `${
 31 |         options.excludeDomains && options.excludeDomains.length > 0
 32 |           ? `${options.excludeDomains.map((domain) => `-site:${domain}`).join(' ')} `
 33 |           : ''
 34 |       }${query}`,
 35 |       count: `${options.count || 10}`,
 36 |     });
 37 | 
 38 |     return `https://www.bing.com/search?${searchParams.toString()}`;
 39 |   }
 40 | 
 41 |   /**
 42 |    * Extracts search results from a Bing search page.
 43 |    *
 44 |    * @param window - The browser window object containing the loaded Bing search page
 45 |    * @returns Array of search results extracted from the page
 46 |    */
 47 |   extractSearchResults(window: Window): SearchResult[] {
 48 |     const links: SearchResult[] = [];
 49 |     const document = window.document;
 50 | 
 51 |     /**
 52 |      * Validates if a string is a properly formatted URL.
 53 |      *
 54 |      * @param url - The URL string to validate
 55 |      * @returns Boolean indicating if the URL is valid
 56 |      */
 57 |     const isValidUrl = (url: string) => {
 58 |       try {
 59 |         new URL(url);
 60 |         return true;
 61 |       } catch (error) {
 62 |         return false;
 63 |       }
 64 |     };
 65 | 
 66 |     /**
 67 |     * Extracts the snippet text from a search result element
 68 |     * @param element - The search result element
 69 |     * @returns The extracted snippet text
 70 |     */
 71 |     const extractSnippet = (element: Element): string => {
 72 |     // Clone the element to avoid modifying the original DOM
 73 |       const clone = element.cloneNode(true) as Element;
 74 |  
 75 |       // Remove title elements (typically h2 tags in Bing)
 76 |       const titleElements = clone.querySelectorAll('h2');
 77 |       titleElements.forEach((el) => el.remove());
 78 | 
 79 |       // Remove any cite/URL elements
 80 |       const citeElements = clone.querySelectorAll('.b_attribution');
 81 |       citeElements.forEach((el) => el.remove());
 82 | 
 83 |       // Remove script and style elements
 84 |       const scriptElements = clone.querySelectorAll('script, style');
 85 |       scriptElements.forEach((el) => el.remove());
 86 | 
 87 |       // Get text content and remove duplicates
 88 |       const text = Array.from(clone.querySelectorAll('*'))
 89 |         .filter((node) => node.textContent?.trim())
 90 |         .map((node) => node.textContent?.trim())
 91 |         .filter(Boolean)
 92 |         .reduce((acc: string[], curr) => {
 93 |           // Only add text if it's not already included in accumulated text
 94 |           if (
 95 |             !acc.some(
 96 |               (text) =>
 97 |                 text.includes(curr as string) ||
 98 |             (curr as string).includes(text),
 99 |             )
100 |           ) {
101 |             acc.push(curr as string);
102 |           }
103 |           return acc;
104 |         }, [])
105 |         .join(' ')
106 |         .trim()
107 |         .replace(/\s+/g, ' ');
108 | 
109 |       return text;
110 |     };
111 | 
112 |     try {
113 |       // Bing search results are in elements with class 'b_algo'
114 |       const elements = document.querySelectorAll('.b_algo');
115 |       elements.forEach((element) => {
116 |         const titleEl = element.querySelector('h2');
117 |         const urlEl = element.querySelector('h2 a');
118 |         const url = urlEl?.getAttribute('href');
119 |         const snippet = extractSnippet(element);
120 | 
121 |         if (!url || !isValidUrl(url)) return;
122 | 
123 |         const item: SearchResult = {
124 |           title: titleEl?.textContent || '',
125 |           snippet,
126 |           url,
127 |           content: '',
128 |         };
129 | 
130 |         if (!item.title || !item.url) return;
131 | 
132 |         links.push(item);
133 |       });
134 |     } catch (error) {
135 |       console.error('Error extracting search results from Bing:', error);
136 |       throw error;
137 |     }
138 | 
139 |     return links;
140 |   }
141 | 
142 |   /**
143 |   * Waits for Bing search results to load completely.
144 |   *
145 |   * @param page - The Puppeteer page object
146 |   * @returns Promise that resolves when search results are loaded
147 |   */
148 |   async waitForSearchResults(page: Page, timeout?: number): Promise<void> {
149 |     await page.waitForSelector('#b_results', {
150 |       timeout: timeout ?? 10000,
151 |     });
152 |   }
153 | }


--------------------------------------------------------------------------------
/src/libs/browser-search/engines/get.ts:
--------------------------------------------------------------------------------
 1 | import { BingSearchEngine } from './bing.js';
 2 | import { BaiduSearchEngine } from './baidu.js';
 3 | import type { LocalBrowserSearchEngine, SearchEngineAdapter } from '../types.js';
 4 | import { SogouSearchEngine } from './sogou.js';
 5 | import { GoogleSearchEngine } from './google.js';
 6 | 
 7 | /**
 8 |  * Factory function to get the appropriate search engine adapter instance.
 9 |  *
10 |  * @param engine - The search engine identifier ('sogou', 'bing', or 'baidu')
11 |  * @returns An instance of the requested search engine adapter
12 |  */
13 | export function getSearchEngine(engine: LocalBrowserSearchEngine): SearchEngineAdapter {
14 |   switch (engine) {
15 |     case 'bing':
16 |       return new BingSearchEngine();
17 |     case 'baidu':
18 |       return new BaiduSearchEngine();
19 |     case 'sogou':
20 |       return new SogouSearchEngine();
21 |     case 'google':
22 |       return new GoogleSearchEngine();
23 |     default:
24 |       return new BingSearchEngine();
25 |   }
26 | }


--------------------------------------------------------------------------------
/src/libs/browser-search/engines/google.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (c) 2025 Bytedance, Inc. and its affiliates.
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | import type { Page } from '../../browser/types.js';
  6 | import type { SearchEngineAdapter, SearchResult } from '../types.js';
  7 | 
  8 | /**
  9 |  * Google search engine adapter implementation.
 10 |  * Provides functionality to generate Google search URLs and extract search results from Google search pages.
 11 |  */
 12 | export class GoogleSearchEngine implements SearchEngineAdapter {
 13 |   /**
 14 |    * Generates a Google search URL based on the provided query and options.
 15 |    *
 16 |    * @param query - The search query string
 17 |    * @param options - Search configuration options
 18 |    * @param options.count - Number of search results to request (default: 10)
 19 |    * @param options.excludeDomains - Array of domain names to exclude from search results
 20 |    * @returns Formatted Google search URL as a string
 21 |    */
 22 |   getSearchUrl(
 23 |     query: string,
 24 |     options: {
 25 |       count?: number;
 26 |       excludeDomains?: string[];
 27 |     },
 28 |   ): string {
 29 |     const searchParams = new URLSearchParams({
 30 |       q: `${
 31 |         options.excludeDomains && options.excludeDomains.length > 0
 32 |           ? `${options.excludeDomains.map((domain) => `-site:${domain}`).join(' ')} `
 33 |           : ''
 34 |       }${query}`,
 35 |       num: `${options.count || 10}`,
 36 |     });
 37 | 
 38 |     searchParams.set('udm', '14');
 39 |     return `https://www.google.com/search?${searchParams.toString()}`;
 40 |   }
 41 | 
 42 |   /**
 43 |    * Extracts search results from a Google search page.
 44 |    *
 45 |    * @param window - The browser window object containing the loaded Google search page
 46 |    * @returns Array of search results extracted from the page
 47 |    */
 48 |   extractSearchResults(window: Window): SearchResult[] {
 49 |     const links: SearchResult[] = [];
 50 |     const document = window.document;
 51 | 
 52 |     /**
 53 |      * Validates if a string is a properly formatted URL.
 54 |      *
 55 |      * @param url - The URL string to validate
 56 |      * @returns Boolean indicating if the URL is valid
 57 |      */
 58 |     const isValidUrl = (url: string) => {
 59 |       try {
 60 |         new URL(url);
 61 |         return true;
 62 |       } catch (error) {
 63 |         return false;
 64 |       }
 65 |     };
 66 | 
 67 |     /**
 68 |      * Extracts the snippet text from an element by cloning it and removing title elements
 69 |      *
 70 |      * @param element - The search result element
 71 |      * @returns The extracted snippet text
 72 |      */
 73 |     const extractSnippet = (element: Element): string => {
 74 |       // Clone the element to avoid modifying the original DOM
 75 |       const clone = element.cloneNode(true) as Element;
 76 | 
 77 |       // Remove title elements (typically h3 tags in Google)
 78 |       const titleElements = clone.querySelectorAll('h3');
 79 |       titleElements.forEach((el) => el.remove());
 80 | 
 81 |       // Remove any cite elements (showing the URL)
 82 |       const citeElements = clone.querySelectorAll('cite');
 83 |       citeElements.forEach((el) => el.remove());
 84 | 
 85 |       // Remove script and style elements
 86 |       const scriptElements = clone.querySelectorAll('script, style');
 87 |       scriptElements.forEach((el) => el.remove());
 88 | 
 89 |       // Get text content and remove duplicates
 90 |       const text = Array.from(clone.querySelectorAll('*'))
 91 |         .filter((node) => node.textContent?.trim())
 92 |         .map((node) => node.textContent?.trim())
 93 |         .filter(Boolean)
 94 |         .reduce((acc: string[], curr) => {
 95 |           // Only add text if it's not already included in accumulated text
 96 |           if (
 97 |             !acc.some(
 98 |               (text) =>
 99 |                 text.includes(curr as string) ||
100 |                 (curr as string).includes(text),
101 |             )
102 |           ) {
103 |             acc.push(curr as string);
104 |           }
105 |           return acc;
106 |         }, [])
107 |         .join(' ')
108 |         .trim()
109 |         .replace(/\s+/g, ' ');
110 | 
111 |       return text;
112 |     };
113 | 
114 |     try {
115 |       // Google search results are contained in elements with class 'tF2Cxc'
116 |       // It may change at any time
117 |       const elements = document.querySelectorAll('.tF2Cxc');
118 |       elements.forEach((element) => {
119 |         const titleEl = element.querySelector('h3');
120 |         const urlEl = element.querySelector('a');
121 |         const url = urlEl?.getAttribute('href');
122 | 
123 |         // Extract snippet using the generic method
124 |         const snippet = extractSnippet(element.parentElement || element);
125 | 
126 |         if (!url || !isValidUrl(url)) return;
127 | 
128 |         const item: SearchResult = {
129 |           title: titleEl?.textContent || '',
130 |           url,
131 |           snippet,
132 |           content: '',
133 |         };
134 | 
135 |         if (!item.title || !item.url) return;
136 | 
137 |         links.push(item);
138 |       });
139 |     } catch (error) {
140 |       console.error(error);
141 |     }
142 | 
143 |     return links;
144 |   }
145 | 
146 |   /**
147 |    * Waits for Google search results to load completely.
148 |    *
149 |    * @param page - The Puppeteer page object
150 |    * @returns Promise that resolves when search results are loaded
151 |    */
152 |   async waitForSearchResults(page: Page, timeout?: number): Promise<void> {
153 |     await page.waitForSelector('#search', {
154 |       timeout: timeout ?? 10000,
155 |     });
156 |   }
157 | }


--------------------------------------------------------------------------------
/src/libs/browser-search/engines/index.ts:
--------------------------------------------------------------------------------
1 | export * from './bing.js';
2 | export * from './baidu.js';
3 | export * from './sogou.js';
4 | export { getSearchEngine } from './get.js';


--------------------------------------------------------------------------------
/src/libs/browser-search/engines/sogou.ts:
--------------------------------------------------------------------------------
  1 | import { Page } from '../../browser/index.js';
  2 | import type { SearchEngineAdapter, SearchResult } from '../types.js';
  3 | 
  4 | export class SogouSearchEngine implements SearchEngineAdapter {
  5 |   /**
  6 |    * Generates a Sogou search URL based on the provided query and options.
  7 |    *
  8 |    * @param query - The search query string
  9 |    * @param options - Search configuration options
 10 |    * @param options.count - Number of search results to request (default: 10)
 11 |    * @param options.excludeDomains - Array of domain names to exclude from search results
 12 |    * @returns Formatted Sogou search URL as a string
 13 |    */
 14 |   getSearchUrl(
 15 |     query: string,
 16 |     options: {
 17 |       count?: number;
 18 |       excludeDomains?: string[];
 19 |     },
 20 |   ): string {
 21 |     const { count = 10, excludeDomains = [] } = options;
 22 | 
 23 |     const excludeDomainsQuery =
 24 |       excludeDomains && excludeDomains.length > 0
 25 |         ? excludeDomains.map((domain) => `-site:${domain}`).join(' ')
 26 |         : '';
 27 | 
 28 |     const searchParams = new URLSearchParams({
 29 |       query: `${excludeDomainsQuery ? `${excludeDomainsQuery} ` : ''}${query}`,
 30 |       num: `${count}`,
 31 |     });
 32 | 
 33 |     return `https://www.sogou.com/web?${searchParams.toString()}`;
 34 |   }
 35 | 
 36 |   /**
 37 |    * !NOTE: This function runs in the context of the browser page, not Node.js
 38 |    * 
 39 |    * Extract search results from Sogou
 40 |    * @param window - The window object
 41 |    * @returns Search results
 42 |    */
 43 |   extractSearchResults(window: Window): SearchResult[] {
 44 |     const links: SearchResult[] = [];
 45 |     const document = window.document;
 46 | 
 47 |     const isValidUrl = (url: string) => {
 48 |       try {
 49 |         new URL(url);
 50 |         return true;
 51 |       } catch (error) {
 52 |         return false;
 53 |       }
 54 |     };
 55 | 
 56 |     const EndPoints = 'https://www.sogou.com';
 57 |     
 58 |     const SELECTOR = {
 59 |       results: '.results .vrwrap',
 60 |       resultTitle: '.vr-title',
 61 |       resultLink: '.vr-title > a',
 62 |       resultSnippet: ['.star-wiki', '.fz-mid', '.attribute-centent'],
 63 |       resultSnippetExcluded: ['.text-lightgray', '.zan-box', '.tag-website'],
 64 |       related: '#main .vrwrap.middle-better-hintBox .hint-mid',
 65 |     };
 66 | 
 67 |     try {
 68 |       const elements = document.querySelectorAll(SELECTOR.results);
 69 |       elements.forEach((element) => {
 70 |         const titleEl = element.querySelector(SELECTOR.resultTitle);
 71 |         let url = element.querySelector(SELECTOR.resultLink)?.getAttribute('href');
 72 | 
 73 |         const snippets = SELECTOR.resultSnippet.map((selector) => {
 74 |           const cloneElement = element.cloneNode(true) as HTMLElement;
 75 |           // remove excluded elements
 76 |           SELECTOR.resultSnippetExcluded.forEach((excludedSelector) => {
 77 |             const el = cloneElement.querySelector(excludedSelector);
 78 |             el?.remove();
 79 |           });
 80 |           // get the text content of the element
 81 |           const el = cloneElement.querySelector(selector);
 82 |           return el?.textContent?.trim() || '';
 83 |         });
 84 | 
 85 |         const snippet = snippets
 86 |           .filter(Boolean)
 87 |           .join(' ')
 88 |           .replace(/\s+/g, ' ')
 89 |           .trim();
 90 | 
 91 |         if (!url?.includes('http')) url = `${EndPoints}${url}`;
 92 | 
 93 |         if (!url?.trim() || !isValidUrl(url)) return;
 94 | 
 95 |         const item: SearchResult = {
 96 |           title: titleEl?.textContent?.trim() || '',
 97 |           url,
 98 |           snippet,
 99 |           content: '',
100 |         };
101 | 
102 |         if (!item.title || !item.url) return;
103 | 
104 |         links.push(item);
105 |       });
106 |     } catch (error) {
107 |       const msg = error instanceof Error ? error.message : String(error);
108 |       console.error('Error extracting search results from Sogou:', msg);
109 |       throw error;
110 |     }
111 | 
112 |     return links;
113 |   }
114 | 
115 |   /**
116 |   * Waits for Sogou search results to load completely.
117 |   *
118 |   * @param page - The Puppeteer page object
119 |   * @returns Promise that resolves when search results are loaded
120 |   */
121 |   async waitForSearchResults(page: Page, timeout?: number): Promise<void> {
122 |     await page.waitForSelector('#pagebar_container', {
123 |       timeout: timeout ?? 10000,
124 |     });
125 |   }
126 | }
127 | 


--------------------------------------------------------------------------------
/src/libs/browser-search/index.ts:
--------------------------------------------------------------------------------
1 | /**
2 |  * A tiny stealth-mode web search and content extraction library built on top of Puppeteer
3 |  * The following code is based on
4 |  * https://github.com/bytedance/UI-TARS-desktop/tree/main/packages/agent-infra/search/browser-search
5 |  */
6 | 
7 | export * from './types.js';
8 | export * from './search.js';


--------------------------------------------------------------------------------
/src/libs/browser-search/queue.ts:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025 Bytedance, Inc. and its affiliates.
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | 
 6 | export interface Task<T> {
 7 |   (): Promise<T>;
 8 | }
 9 | 
10 | export class PromiseQueue {
11 |   private queue: Task<any>[] = [];
12 | 
13 |   private concurrency: number;
14 | 
15 |   private running = 0;
16 | 
17 |   private results: any[] = [];
18 | 
19 |   constructor(concurrency = 1) {
20 |     this.concurrency = concurrency;
21 |   }
22 | 
23 |   add<T>(task: Task<T>): Promise<T> {
24 |     return new Promise((resolve, reject) => {
25 |       this.queue.push(async () => {
26 |         try {
27 |           const result = await task();
28 |           resolve(result);
29 |           return result;
30 |         } catch (error) {
31 |           reject(error);
32 |           throw error;
33 |         }
34 |       });
35 |       this.run();
36 |     });
37 |   }
38 | 
39 |   private async run() {
40 |     if (this.running >= this.concurrency || this.queue.length === 0) {
41 |       return;
42 |     }
43 | 
44 |     this.running++;
45 |     const task = this.queue.shift()!;
46 | 
47 |     try {
48 |       const result = await task();
49 |       this.results.push(result);
50 |     } catch (error) {
51 |       // Handle error if needed
52 |     } finally {
53 |       this.running--;
54 |       this.run();
55 |     }
56 |   }
57 | 
58 |   async waitAll(): Promise<any[]> {
59 |     while (this.running > 0 || this.queue.length > 0) {
60 |       await new Promise((resolve) => setTimeout(resolve, 100));
61 |     }
62 |     return this.results;
63 |   }
64 | }


--------------------------------------------------------------------------------
/src/libs/browser-search/readability.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * PLEASE DO NOT MODIFY IT as it is generated by the build script
 3 |  *
 4 |  * Build: scripts/build-readability.ts
 5 |  * Source: https://github.com/mozilla/readability/blob/main/Readability.js
 6 |  */
 7 | 
 8 | /**
 9 |  * Copyright (c) 2010 Arc90 Inc
10 |  *
11 |  * Licensed under the Apache License, Version 2.0 (the "License");
12 |  * you may not use this file except in compliance with the License.
13 |  * You may obtain a copy of the License at
14 |  *
15 |  *    http://www.apache.org/licenses/LICENSE-2.0
16 |  *
17 |  * Unless required by applicable law or agreed to in writing, software
18 |  * distributed under the License is distributed on an "AS IS" BASIS,
19 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 |  * See the License for the specific language governing permissions and
21 |  * limitations under the License.
22 |  */
23 | 
24 | export const READABILITY_SCRIPT =
25 |   'function q(t,e){if(e&&e.documentElement)t=e,e=arguments[2];else if(!t||!t.documentElement)throw new Error("First argument to Readability constructor should be a document object.");if(e=e||{},this._doc=t,this._docJSDOMParser=this._doc.firstChild.__JSDOMParser__,this._articleTitle=null,this._articleByline=null,this._articleDir=null,this._articleSiteName=null,this._attempts=[],this._debug=!!e.debug,this._maxElemsToParse=e.maxElemsToParse||this.DEFAULT_MAX_ELEMS_TO_PARSE,this._nbTopCandidates=e.nbTopCandidates||this.DEFAULT_N_TOP_CANDIDATES,this._charThreshold=e.charThreshold||this.DEFAULT_CHAR_THRESHOLD,this._classesToPreserve=this.CLASSES_TO_PRESERVE.concat(e.classesToPreserve||[]),this._keepClasses=!!e.keepClasses,this._serializer=e.serializer||function(i){return i.innerHTML},this._disableJSONLD=!!e.disableJSONLD,this._allowedVideoRegex=e.allowedVideoRegex||this.REGEXPS.videos,this._flags=this.FLAG_STRIP_UNLIKELYS|this.FLAG_WEIGHT_CLASSES|this.FLAG_CLEAN_CONDITIONALLY,this._debug){let i=function(r){if(r.nodeType==r.TEXT_NODE)return`${r.nodeName} ("${r.textContent}")`;let l=Array.from(r.attributes||[],function(a){return`${a.name}="${a.value}"`}).join(" ");return`<${r.localName} ${l}>`};this.log=function(){if(typeof console!="undefined"){let l=Array.from(arguments,a=>a&&a.nodeType==this.ELEMENT_NODE?i(a):a);l.unshift("Reader: (Readability)"),console.log.apply(console,l)}else if(typeof dump!="undefined"){var r=Array.prototype.map.call(arguments,function(l){return l&&l.nodeName?i(l):l}).join(" ");dump("Reader: (Readability) "+r+`\n`)}}}else this.log=function(){}}q.prototype={FLAG_STRIP_UNLIKELYS:1,FLAG_WEIGHT_CLASSES:2,FLAG_CLEAN_CONDITIONALLY:4,ELEMENT_NODE:1,TEXT_NODE:3,DEFAULT_MAX_ELEMS_TO_PARSE:0,DEFAULT_N_TOP_CANDIDATES:5,DEFAULT_TAGS_TO_SCORE:"section,h2,h3,h4,h5,h6,p,td,pre".toUpperCase().split(","),DEFAULT_CHAR_THRESHOLD:500,REGEXPS:{unlikelyCandidates:/-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,okMaybeItsACandidate:/and|article|body|column|content|main|shadow/i,positive:/article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i,negative:/-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i,extraneous:/print|archive|comment|discuss|e[\\-]?mail|share|reply|all|login|sign|single|utility/i,byline:/byline|author|dateline|writtenby|p-author/i,replaceFonts:/<(\\/?)font[^>]*>/gi,normalize:/\\s{2,}/g,videos:/\\/\\/(www\\.)?((dailymotion|youtube|youtube-nocookie|player\\.vimeo|v\\.qq)\\.com|(archive|upload\\.wikimedia)\\.org|player\\.twitch\\.tv)/i,shareElements:/(\\b|_)(share|sharedaddy)(\\b|_)/i,nextLink:/(next|weiter|continue|>([^\\|]|$)|»([^\\|]|$))/i,prevLink:/(prev|earl|old|new|<|«)/i,tokenize:/\\W+/g,whitespace:/^\\s*$/,hasContent:/\\S$/,hashUrl:/^#.+/,srcsetUrl:/(\\S+)(\\s+[\\d.]+[xw])?(\\s*(?:,|$))/g,b64DataUrl:/^data:\\s*([^\\s;,]+)\\s*;\\s*base64\\s*,/i,commas:/\\u002C|\\u060C|\\uFE50|\\uFE10|\\uFE11|\\u2E41|\\u2E34|\\u2E32|\\uFF0C/g,jsonLdArticleTypes:/^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$/},UNLIKELY_ROLES:["menu","menubar","complementary","navigation","alert","alertdialog","dialog"],DIV_TO_P_ELEMS:new Set(["BLOCKQUOTE","DL","DIV","IMG","OL","P","PRE","TABLE","UL"]),ALTER_TO_DIV_EXCEPTIONS:["DIV","ARTICLE","SECTION","P"],PRESENTATIONAL_ATTRIBUTES:["align","background","bgcolor","border","cellpadding","cellspacing","frame","hspace","rules","style","valign","vspace"],DEPRECATED_SIZE_ATTRIBUTE_ELEMS:["TABLE","TH","TD","HR","PRE"],PHRASING_ELEMS:["ABBR","AUDIO","B","BDO","BR","BUTTON","CITE","CODE","DATA","DATALIST","DFN","EM","EMBED","I","IMG","INPUT","KBD","LABEL","MARK","MATH","METER","NOSCRIPT","OBJECT","OUTPUT","PROGRESS","Q","RUBY","SAMP","SCRIPT","SELECT","SMALL","SPAN","STRONG","SUB","SUP","TEXTAREA","TIME","VAR","WBR"],CLASSES_TO_PRESERVE:["page"],HTML_ESCAPE_MAP:{lt:"<",gt:">",amp:"&",quot:\'"\',apos:"\'"},_postProcessContent:function(t){this._fixRelativeUris(t),this._simplifyNestedElements(t),this._keepClasses||this._cleanClasses(t)},_removeNodes:function(t,e){if(this._docJSDOMParser&&t._isLiveNodeList)throw new Error("Do not pass live node lists to _removeNodes");for(var i=t.length-1;i>=0;i--){var r=t[i],l=r.parentNode;l&&(!e||e.call(this,r,i,t))&&l.removeChild(r)}},_replaceNodeTags:function(t,e){if(this._docJSDOMParser&&t._isLiveNodeList)throw new Error("Do not pass live node lists to _replaceNodeTags");for(let i of t)this._setNodeTag(i,e)},_forEachNode:function(t,e){Array.prototype.forEach.call(t,e,this)},_findNode:function(t,e){return Array.prototype.find.call(t,e,this)},_someNode:function(t,e){return Array.prototype.some.call(t,e,this)},_everyNode:function(t,e){return Array.prototype.every.call(t,e,this)},_concatNodeLists:function(){var t=Array.prototype.slice,e=t.call(arguments),i=e.map(function(r){return t.call(r)});return Array.prototype.concat.apply([],i)},_getAllNodesWithTag:function(t,e){return t.querySelectorAll?t.querySelectorAll(e.join(",")):[].concat.apply([],e.map(function(i){var r=t.getElementsByTagName(i);return Array.isArray(r)?r:Array.from(r)}))},_cleanClasses:function(t){var e=this._classesToPreserve,i=(t.getAttribute("class")||"").split(/\\s+/).filter(function(r){return e.indexOf(r)!=-1}).join(" ");for(i?t.setAttribute("class",i):t.removeAttribute("class"),t=t.firstElementChild;t;t=t.nextElementSibling)this._cleanClasses(t)},_fixRelativeUris:function(t){var e=this._doc.baseURI,i=this._doc.documentURI;function r(s){if(e==i&&s.charAt(0)=="#")return s;try{return new URL(s,e).href}catch(h){}return s}var l=this._getAllNodesWithTag(t,["a"]);this._forEachNode(l,function(s){var h=s.getAttribute("href");if(h)if(h.indexOf("javascript:")===0)if(s.childNodes.length===1&&s.childNodes[0].nodeType===this.TEXT_NODE){var c=this._doc.createTextNode(s.textContent);s.parentNode.replaceChild(c,s)}else{for(var n=this._doc.createElement("span");s.firstChild;)n.appendChild(s.firstChild);s.parentNode.replaceChild(n,s)}else s.setAttribute("href",r(h))});var a=this._getAllNodesWithTag(t,["img","picture","figure","video","audio","source"]);this._forEachNode(a,function(s){var h=s.getAttribute("src"),c=s.getAttribute("poster"),n=s.getAttribute("srcset");if(h&&s.setAttribute("src",r(h)),c&&s.setAttribute("poster",r(c)),n){var u=n.replace(this.REGEXPS.srcsetUrl,function(m,b,N,v){return r(b)+(N||"")+v});s.setAttribute("srcset",u)}})},_simplifyNestedElements:function(t){for(var e=t;e;){if(e.parentNode&&["DIV","SECTION"].includes(e.tagName)&&!(e.id&&e.id.startsWith("readability"))){if(this._isElementWithoutContent(e)){e=this._removeAndGetNext(e);continue}else if(this._hasSingleTagInsideElement(e,"DIV")||this._hasSingleTagInsideElement(e,"SECTION")){for(var i=e.children[0],r=0;r<e.attributes.length;r++)i.setAttribute(e.attributes[r].name,e.attributes[r].value);e.parentNode.replaceChild(i,e),e=i;continue}}e=this._getNextNode(e)}},_getArticleTitle:function(){var t=this._doc,e="",i="";try{e=i=t.title.trim(),typeof e!="string"&&(e=i=this._getInnerText(t.getElementsByTagName("title")[0]))}catch(u){}var r=!1;function l(u){return u.split(/\\s+/).length}if(/ [\\|\\-\\\\\\/>»] /.test(e))r=/ [\\\\\\/>»] /.test(e),e=i.replace(/(.*)[\\|\\-\\\\\\/>»] .*/gi,"$1"),l(e)<3&&(e=i.replace(/[^\\|\\-\\\\\\/>»]*[\\|\\-\\\\\\/>»](.*)/gi,"$1"));else if(e.indexOf(": ")!==-1){var a=this._concatNodeLists(t.getElementsByTagName("h1"),t.getElementsByTagName("h2")),s=e.trim(),h=this._someNode(a,function(u){return u.textContent.trim()===s});h||(e=i.substring(i.lastIndexOf(":")+1),l(e)<3?e=i.substring(i.indexOf(":")+1):l(i.substr(0,i.indexOf(":")))>5&&(e=i))}else if(e.length>150||e.length<15){var c=t.getElementsByTagName("h1");c.length===1&&(e=this._getInnerText(c[0]))}e=e.trim().replace(this.REGEXPS.normalize," ");var n=l(e);return n<=4&&(!r||n!=l(i.replace(/[\\|\\-\\\\\\/>»]+/g,""))-1)&&(e=i),e},_prepDocument:function(){var t=this._doc;this._removeNodes(this._getAllNodesWithTag(t,["style"])),t.body&&this._replaceBrs(t.body),this._replaceNodeTags(this._getAllNodesWithTag(t,["font"]),"SPAN")},_nextNode:function(t){for(var e=t;e&&e.nodeType!=this.ELEMENT_NODE&&this.REGEXPS.whitespace.test(e.textContent);)e=e.nextSibling;return e},_replaceBrs:function(t){this._forEachNode(this._getAllNodesWithTag(t,["br"]),function(e){for(var i=e.nextSibling,r=!1;(i=this._nextNode(i))&&i.tagName=="BR";){r=!0;var l=i.nextSibling;i.parentNode.removeChild(i),i=l}if(r){var a=this._doc.createElement("p");for(e.parentNode.replaceChild(a,e),i=a.nextSibling;i;){if(i.tagName=="BR"){var s=this._nextNode(i.nextSibling);if(s&&s.tagName=="BR")break}if(!this._isPhrasingContent(i))break;var h=i.nextSibling;a.appendChild(i),i=h}for(;a.lastChild&&this._isWhitespace(a.lastChild);)a.removeChild(a.lastChild);a.parentNode.tagName==="P"&&this._setNodeTag(a.parentNode,"DIV")}})},_setNodeTag:function(t,e){if(this.log("_setNodeTag",t,e),this._docJSDOMParser)return t.localName=e.toLowerCase(),t.tagName=e.toUpperCase(),t;for(var i=t.ownerDocument.createElement(e);t.firstChild;)i.appendChild(t.firstChild);t.parentNode.replaceChild(i,t),t.readability&&(i.readability=t.readability);for(var r=0;r<t.attributes.length;r++)try{i.setAttribute(t.attributes[r].name,t.attributes[r].value)}catch(l){}return i},_prepArticle:function(t){this._cleanStyles(t),this._markDataTables(t),this._fixLazyImages(t),this._cleanConditionally(t,"form"),this._cleanConditionally(t,"fieldset"),this._clean(t,"object"),this._clean(t,"embed"),this._clean(t,"footer"),this._clean(t,"link"),this._clean(t,"aside");var e=this.DEFAULT_CHAR_THRESHOLD;this._forEachNode(t.children,function(i){this._cleanMatchedNodes(i,function(r,l){return this.REGEXPS.shareElements.test(l)&&r.textContent.length<e})}),this._clean(t,"iframe"),this._clean(t,"input"),this._clean(t,"textarea"),this._clean(t,"select"),this._clean(t,"button"),this._cleanHeaders(t),this._cleanConditionally(t,"table"),this._cleanConditionally(t,"ul"),this._cleanConditionally(t,"div"),this._replaceNodeTags(this._getAllNodesWithTag(t,["h1"]),"h2"),this._removeNodes(this._getAllNodesWithTag(t,["p"]),function(i){var r=i.getElementsByTagName("img").length,l=i.getElementsByTagName("embed").length,a=i.getElementsByTagName("object").length,s=i.getElementsByTagName("iframe").length,h=r+l+a+s;return h===0&&!this._getInnerText(i,!1)}),this._forEachNode(this._getAllNodesWithTag(t,["br"]),function(i){var r=this._nextNode(i.nextSibling);r&&r.tagName=="P"&&i.parentNode.removeChild(i)}),this._forEachNode(this._getAllNodesWithTag(t,["table"]),function(i){var r=this._hasSingleTagInsideElement(i,"TBODY")?i.firstElementChild:i;if(this._hasSingleTagInsideElement(r,"TR")){var l=r.firstElementChild;if(this._hasSingleTagInsideElement(l,"TD")){var a=l.firstElementChild;a=this._setNodeTag(a,this._everyNode(a.childNodes,this._isPhrasingContent)?"P":"DIV"),i.parentNode.replaceChild(a,i)}}})},_initializeNode:function(t){switch(t.readability={contentScore:0},t.tagName){case"DIV":t.readability.contentScore+=5;break;case"PRE":case"TD":case"BLOCKQUOTE":t.readability.contentScore+=3;break;case"ADDRESS":case"OL":case"UL":case"DL":case"DD":case"DT":case"LI":case"FORM":t.readability.contentScore-=3;break;case"H1":case"H2":case"H3":case"H4":case"H5":case"H6":case"TH":t.readability.contentScore-=5;break}t.readability.contentScore+=this._getClassWeight(t)},_removeAndGetNext:function(t){var e=this._getNextNode(t,!0);return t.parentNode.removeChild(t),e},_getNextNode:function(t,e){if(!e&&t.firstElementChild)return t.firstElementChild;if(t.nextElementSibling)return t.nextElementSibling;do t=t.parentNode;while(t&&!t.nextElementSibling);return t&&t.nextElementSibling},_textSimilarity:function(t,e){var i=t.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean),r=e.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean);if(!i.length||!r.length)return 0;var l=r.filter(s=>!i.includes(s)),a=l.join(" ").length/r.join(" ").length;return 1-a},_checkByline:function(t,e){if(this._articleByline)return!1;if(t.getAttribute!==void 0)var i=t.getAttribute("rel"),r=t.getAttribute("itemprop");return(i==="author"||r&&r.indexOf("author")!==-1||this.REGEXPS.byline.test(e))&&this._isValidByline(t.textContent)?(this._articleByline=t.textContent.trim(),!0):!1},_getNodeAncestors:function(t,e){e=e||0;for(var i=0,r=[];t.parentNode&&(r.push(t.parentNode),!(e&&++i===e));)t=t.parentNode;return r},_grabArticle:function(t){this.log("**** grabArticle ****");var e=this._doc,i=t!==null;if(t=t||this._doc.body,!t)return this.log("No body found in document. Abort."),null;for(var r=t.innerHTML;;){this.log("Starting grabArticle loop");var l=this._flagIsActive(this.FLAG_STRIP_UNLIKELYS),a=[],s=this._doc.documentElement;let J=!0;for(;s;){s.tagName==="HTML"&&(this._articleLang=s.getAttribute("lang"));var h=s.className+" "+s.id;if(!this._isProbablyVisible(s)){this.log("Removing hidden node - "+h),s=this._removeAndGetNext(s);continue}if(s.getAttribute("aria-modal")=="true"&&s.getAttribute("role")=="dialog"){s=this._removeAndGetNext(s);continue}if(this._checkByline(s,h)){s=this._removeAndGetNext(s);continue}if(J&&this._headerDuplicatesTitle(s)){this.log("Removing header: ",s.textContent.trim(),this._articleTitle.trim()),J=!1,s=this._removeAndGetNext(s);continue}if(l){if(this.REGEXPS.unlikelyCandidates.test(h)&&!this.REGEXPS.okMaybeItsACandidate.test(h)&&!this._hasAncestorTag(s,"table")&&!this._hasAncestorTag(s,"code")&&s.tagName!=="BODY"&&s.tagName!=="A"){this.log("Removing unlikely candidate - "+h),s=this._removeAndGetNext(s);continue}if(this.UNLIKELY_ROLES.includes(s.getAttribute("role"))){this.log("Removing content with role "+s.getAttribute("role")+" - "+h),s=this._removeAndGetNext(s);continue}}if((s.tagName==="DIV"||s.tagName==="SECTION"||s.tagName==="HEADER"||s.tagName==="H1"||s.tagName==="H2"||s.tagName==="H3"||s.tagName==="H4"||s.tagName==="H5"||s.tagName==="H6")&&this._isElementWithoutContent(s)){s=this._removeAndGetNext(s);continue}if(this.DEFAULT_TAGS_TO_SCORE.indexOf(s.tagName)!==-1&&a.push(s),s.tagName==="DIV"){for(var c=null,n=s.firstChild;n;){var u=n.nextSibling;if(this._isPhrasingContent(n))c!==null?c.appendChild(n):this._isWhitespace(n)||(c=e.createElement("p"),s.replaceChild(c,n),c.appendChild(n));else if(c!==null){for(;c.lastChild&&this._isWhitespace(c.lastChild);)c.removeChild(c.lastChild);c=null}n=u}if(this._hasSingleTagInsideElement(s,"P")&&this._getLinkDensity(s)<.25){var m=s.children[0];s.parentNode.replaceChild(m,s),s=m,a.push(s)}else this._hasChildBlockElement(s)||(s=this._setNodeTag(s,"P"),a.push(s))}s=this._getNextNode(s)}var b=[];this._forEachNode(a,function(A){if(!(!A.parentNode||typeof A.parentNode.tagName=="undefined")){var T=this._getInnerText(A);if(!(T.length<25)){var K=this._getNodeAncestors(A,5);if(K.length!==0){var C=0;C+=1,C+=T.split(this.REGEXPS.commas).length,C+=Math.min(Math.floor(T.length/100),3),this._forEachNode(K,function(S,F){if(!(!S.tagName||!S.parentNode||typeof S.parentNode.tagName=="undefined")){if(typeof S.readability=="undefined"&&(this._initializeNode(S),b.push(S)),F===0)var X=1;else F===1?X=2:X=F*3;S.readability.contentScore+=C/X}})}}}});for(var N=[],v=0,y=b.length;v<y;v+=1){var E=b[v],d=E.readability.contentScore*(1-this._getLinkDensity(E));E.readability.contentScore=d,this.log("Candidate:",E,"with score "+d);for(var p=0;p<this._nbTopCandidates;p++){var x=N[p];if(!x||d>x.readability.contentScore){N.splice(p,0,E),N.length>this._nbTopCandidates&&N.pop();break}}}var o=N[0]||null,L=!1,g;if(o===null||o.tagName==="BODY"){for(o=e.createElement("DIV"),L=!0;t.firstChild;)this.log("Moving child out:",t.firstChild),o.appendChild(t.firstChild);t.appendChild(o),this._initializeNode(o)}else if(o){for(var I=[],P=1;P<N.length;P++)N[P].readability.contentScore/o.readability.contentScore>=.75&&I.push(this._getNodeAncestors(N[P]));var O=3;if(I.length>=O)for(g=o.parentNode;g.tagName!=="BODY";){for(var G=0,H=0;H<I.length&&G<O;H++)G+=Number(I[H].includes(g));if(G>=O){o=g;break}g=g.parentNode}o.readability||this._initializeNode(o),g=o.parentNode;for(var M=o.readability.contentScore,Q=M/3;g.tagName!=="BODY";){if(!g.readability){g=g.parentNode;continue}var V=g.readability.contentScore;if(V<Q)break;if(V>M){o=g;break}M=g.readability.contentScore,g=g.parentNode}for(g=o.parentNode;g.tagName!="BODY"&&g.children.length==1;)o=g,g=o.parentNode;o.readability||this._initializeNode(o)}var _=e.createElement("DIV");i&&(_.id="readability-content");var Z=Math.max(10,o.readability.contentScore*.2);g=o.parentNode;for(var U=g.children,w=0,j=U.length;w<j;w++){var f=U[w],R=!1;if(this.log("Looking at sibling node:",f,f.readability?"with score "+f.readability.contentScore:""),this.log("Sibling has score",f.readability?f.readability.contentScore:"Unknown"),f===o)R=!0;else{var $=0;if(f.className===o.className&&o.className!==""&&($+=o.readability.contentScore*.2),f.readability&&f.readability.contentScore+$>=Z)R=!0;else if(f.nodeName==="P"){var Y=this._getLinkDensity(f),z=this._getInnerText(f),k=z.length;(k>80&&Y<.25||k<80&&k>0&&Y===0&&z.search(/\\.( |$)/)!==-1)&&(R=!0)}}R&&(this.log("Appending node:",f),this.ALTER_TO_DIV_EXCEPTIONS.indexOf(f.nodeName)===-1&&(this.log("Altering sibling:",f,"to div."),f=this._setNodeTag(f,"DIV")),_.appendChild(f),U=g.children,w-=1,j-=1)}if(this._debug&&this.log("Article content pre-prep: "+_.innerHTML),this._prepArticle(_),this._debug&&this.log("Article content post-prep: "+_.innerHTML),L)o.id="readability-page-1",o.className="page";else{var B=e.createElement("DIV");for(B.id="readability-page-1",B.className="page";_.firstChild;)B.appendChild(_.firstChild);_.appendChild(B)}this._debug&&this.log("Article content after paging: "+_.innerHTML);var W=!0,D=this._getInnerText(_,!0).length;if(D<this._charThreshold)if(W=!1,t.innerHTML=r,this._flagIsActive(this.FLAG_STRIP_UNLIKELYS))this._removeFlag(this.FLAG_STRIP_UNLIKELYS),this._attempts.push({articleContent:_,textLength:D});else if(this._flagIsActive(this.FLAG_WEIGHT_CLASSES))this._removeFlag(this.FLAG_WEIGHT_CLASSES),this._attempts.push({articleContent:_,textLength:D});else if(this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY))this._removeFlag(this.FLAG_CLEAN_CONDITIONALLY),this._attempts.push({articleContent:_,textLength:D});else{if(this._attempts.push({articleContent:_,textLength:D}),this._attempts.sort(function(A,T){return T.textLength-A.textLength}),!this._attempts[0].textLength)return null;_=this._attempts[0].articleContent,W=!0}if(W){var tt=[g,o].concat(this._getNodeAncestors(g));return this._someNode(tt,function(A){if(!A.tagName)return!1;var T=A.getAttribute("dir");return T?(this._articleDir=T,!0):!1}),_}}},_isValidByline:function(t){return typeof t=="string"||t instanceof String?(t=t.trim(),t.length>0&&t.length<100):!1},_unescapeHtmlEntities:function(t){if(!t)return t;var e=this.HTML_ESCAPE_MAP;return t.replace(/&(quot|amp|apos|lt|gt);/g,function(i,r){return e[r]}).replace(/&#(?:x([0-9a-z]{1,4})|([0-9]{1,4}));/gi,function(i,r,l){var a=parseInt(r||l,r?16:10);return String.fromCharCode(a)})},_getJSONLD:function(t){var e=this._getAllNodesWithTag(t,["script"]),i;return this._forEachNode(e,function(r){if(!i&&r.getAttribute("type")==="application/ld+json")try{var l=r.textContent.replace(/^\\s*<!\\[CDATA\\[|\\]\\]>\\s*$/g,""),a=JSON.parse(l);if(!a["@context"]||!a["@context"].match(/^https?\\:\\/\\/schema\\.org$/)||(!a["@type"]&&Array.isArray(a["@graph"])&&(a=a["@graph"].find(function(n){return(n["@type"]||"").match(this.REGEXPS.jsonLdArticleTypes)})),!a||!a["@type"]||!a["@type"].match(this.REGEXPS.jsonLdArticleTypes)))return;if(i={},typeof a.name=="string"&&typeof a.headline=="string"&&a.name!==a.headline){var s=this._getArticleTitle(),h=this._textSimilarity(a.name,s)>.75,c=this._textSimilarity(a.headline,s)>.75;c&&!h?i.title=a.headline:i.title=a.name}else typeof a.name=="string"?i.title=a.name.trim():typeof a.headline=="string"&&(i.title=a.headline.trim());a.author&&(typeof a.author.name=="string"?i.byline=a.author.name.trim():Array.isArray(a.author)&&a.author[0]&&typeof a.author[0].name=="string"&&(i.byline=a.author.filter(function(n){return n&&typeof n.name=="string"}).map(function(n){return n.name.trim()}).join(", "))),typeof a.description=="string"&&(i.excerpt=a.description.trim()),a.publisher&&typeof a.publisher.name=="string"&&(i.siteName=a.publisher.name.trim()),typeof a.datePublished=="string"&&(i.datePublished=a.datePublished.trim());return}catch(n){this.log(n.message)}}),i||{}},_getArticleMetadata:function(t){var e={},i={},r=this._doc.getElementsByTagName("meta"),l=/\\s*(article|dc|dcterm|og|twitter)\\s*:\\s*(author|creator|description|published_time|title|site_name)\\s*/gi,a=/^\\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\\s*[\\.:]\\s*)?(author|creator|description|title|site_name)\\s*$/i;return this._forEachNode(r,function(s){var h=s.getAttribute("name"),c=s.getAttribute("property"),n=s.getAttribute("content");if(n){var u=null,m=null;c&&(u=c.match(l),u&&(m=u[0].toLowerCase().replace(/\\s/g,""),i[m]=n.trim())),!u&&h&&a.test(h)&&(m=h,n&&(m=m.toLowerCase().replace(/\\s/g,"").replace(/\\./g,":"),i[m]=n.trim()))}}),e.title=t.title||i["dc:title"]||i["dcterm:title"]||i["og:title"]||i["weibo:article:title"]||i["weibo:webpage:title"]||i.title||i["twitter:title"],e.title||(e.title=this._getArticleTitle()),e.byline=t.byline||i["dc:creator"]||i["dcterm:creator"]||i.author,e.excerpt=t.excerpt||i["dc:description"]||i["dcterm:description"]||i["og:description"]||i["weibo:article:description"]||i["weibo:webpage:description"]||i.description||i["twitter:description"],e.siteName=t.siteName||i["og:site_name"],e.publishedTime=t.datePublished||i["article:published_time"]||null,e.title=this._unescapeHtmlEntities(e.title),e.byline=this._unescapeHtmlEntities(e.byline),e.excerpt=this._unescapeHtmlEntities(e.excerpt),e.siteName=this._unescapeHtmlEntities(e.siteName),e.publishedTime=this._unescapeHtmlEntities(e.publishedTime),e},_isSingleImage:function(t){return t.tagName==="IMG"?!0:t.children.length!==1||t.textContent.trim()!==""?!1:this._isSingleImage(t.children[0])},_unwrapNoscriptImages:function(t){var e=Array.from(t.getElementsByTagName("img"));this._forEachNode(e,function(r){for(var l=0;l<r.attributes.length;l++){var a=r.attributes[l];switch(a.name){case"src":case"srcset":case"data-src":case"data-srcset":return}if(/\\.(jpg|jpeg|png|webp)/i.test(a.value))return}r.parentNode.removeChild(r)});var i=Array.from(t.getElementsByTagName("noscript"));this._forEachNode(i,function(r){var l=t.createElement("div");if(l.innerHTML=r.innerHTML,!!this._isSingleImage(l)){var a=r.previousElementSibling;if(a&&this._isSingleImage(a)){var s=a;s.tagName!=="IMG"&&(s=a.getElementsByTagName("img")[0]);for(var h=l.getElementsByTagName("img")[0],c=0;c<s.attributes.length;c++){var n=s.attributes[c];if(n.value!==""&&(n.name==="src"||n.name==="srcset"||/\\.(jpg|jpeg|png|webp)/i.test(n.value))){if(h.getAttribute(n.name)===n.value)continue;var u=n.name;h.hasAttribute(u)&&(u="data-old-"+u),h.setAttribute(u,n.value)}}r.parentNode.replaceChild(l.firstElementChild,a)}}})},_removeScripts:function(t){this._removeNodes(this._getAllNodesWithTag(t,["script","noscript"]))},_hasSingleTagInsideElement:function(t,e){return t.children.length!=1||t.children[0].tagName!==e?!1:!this._someNode(t.childNodes,function(i){return i.nodeType===this.TEXT_NODE&&this.REGEXPS.hasContent.test(i.textContent)})},_isElementWithoutContent:function(t){return t.nodeType===this.ELEMENT_NODE&&t.textContent.trim().length==0&&(t.children.length==0||t.children.length==t.getElementsByTagName("br").length+t.getElementsByTagName("hr").length)},_hasChildBlockElement:function(t){return this._someNode(t.childNodes,function(e){return this.DIV_TO_P_ELEMS.has(e.tagName)||this._hasChildBlockElement(e)})},_isPhrasingContent:function(t){return t.nodeType===this.TEXT_NODE||this.PHRASING_ELEMS.indexOf(t.tagName)!==-1||(t.tagName==="A"||t.tagName==="DEL"||t.tagName==="INS")&&this._everyNode(t.childNodes,this._isPhrasingContent)},_isWhitespace:function(t){return t.nodeType===this.TEXT_NODE&&t.textContent.trim().length===0||t.nodeType===this.ELEMENT_NODE&&t.tagName==="BR"},_getInnerText:function(t,e){e=typeof e=="undefined"?!0:e;var i=t.textContent.trim();return e?i.replace(this.REGEXPS.normalize," "):i},_getCharCount:function(t,e){return e=e||",",this._getInnerText(t).split(e).length-1},_cleanStyles:function(t){if(!(!t||t.tagName.toLowerCase()==="svg")){for(var e=0;e<this.PRESENTATIONAL_ATTRIBUTES.length;e++)t.removeAttribute(this.PRESENTATIONAL_ATTRIBUTES[e]);this.DEPRECATED_SIZE_ATTRIBUTE_ELEMS.indexOf(t.tagName)!==-1&&(t.removeAttribute("width"),t.removeAttribute("height"));for(var i=t.firstElementChild;i!==null;)this._cleanStyles(i),i=i.nextElementSibling}},_getLinkDensity:function(t){var e=this._getInnerText(t).length;if(e===0)return 0;var i=0;return this._forEachNode(t.getElementsByTagName("a"),function(r){var l=r.getAttribute("href"),a=l&&this.REGEXPS.hashUrl.test(l)?.3:1;i+=this._getInnerText(r).length*a}),i/e},_getClassWeight:function(t){if(!this._flagIsActive(this.FLAG_WEIGHT_CLASSES))return 0;var e=0;return typeof t.className=="string"&&t.className!==""&&(this.REGEXPS.negative.test(t.className)&&(e-=25),this.REGEXPS.positive.test(t.className)&&(e+=25)),typeof t.id=="string"&&t.id!==""&&(this.REGEXPS.negative.test(t.id)&&(e-=25),this.REGEXPS.positive.test(t.id)&&(e+=25)),e},_clean:function(t,e){var i=["object","embed","iframe"].indexOf(e)!==-1;this._removeNodes(this._getAllNodesWithTag(t,[e]),function(r){if(i){for(var l=0;l<r.attributes.length;l++)if(this._allowedVideoRegex.test(r.attributes[l].value))return!1;if(r.tagName==="object"&&this._allowedVideoRegex.test(r.innerHTML))return!1}return!0})},_hasAncestorTag:function(t,e,i,r){i=i||3,e=e.toUpperCase();for(var l=0;t.parentNode;){if(i>0&&l>i)return!1;if(t.parentNode.tagName===e&&(!r||r(t.parentNode)))return!0;t=t.parentNode,l++}return!1},_getRowAndColumnCount:function(t){for(var e=0,i=0,r=t.getElementsByTagName("tr"),l=0;l<r.length;l++){var a=r[l].getAttribute("rowspan")||0;a&&(a=parseInt(a,10)),e+=a||1;for(var s=0,h=r[l].getElementsByTagName("td"),c=0;c<h.length;c++){var n=h[c].getAttribute("colspan")||0;n&&(n=parseInt(n,10)),s+=n||1}i=Math.max(i,s)}return{rows:e,columns:i}},_markDataTables:function(t){for(var e=t.getElementsByTagName("table"),i=0;i<e.length;i++){var r=e[i],l=r.getAttribute("role");if(l=="presentation"){r._readabilityDataTable=!1;continue}var a=r.getAttribute("datatable");if(a=="0"){r._readabilityDataTable=!1;continue}var s=r.getAttribute("summary");if(s){r._readabilityDataTable=!0;continue}var h=r.getElementsByTagName("caption")[0];if(h&&h.childNodes.length>0){r._readabilityDataTable=!0;continue}var c=["col","colgroup","tfoot","thead","th"],n=function(m){return!!r.getElementsByTagName(m)[0]};if(c.some(n)){this.log("Data table because found data-y descendant"),r._readabilityDataTable=!0;continue}if(r.getElementsByTagName("table")[0]){r._readabilityDataTable=!1;continue}var u=this._getRowAndColumnCount(r);if(u.rows>=10||u.columns>4){r._readabilityDataTable=!0;continue}r._readabilityDataTable=u.rows*u.columns>10}},_fixLazyImages:function(t){this._forEachNode(this._getAllNodesWithTag(t,["img","picture","figure"]),function(e){if(e.src&&this.REGEXPS.b64DataUrl.test(e.src)){var i=this.REGEXPS.b64DataUrl.exec(e.src);if(i[1]==="image/svg+xml")return;for(var r=!1,l=0;l<e.attributes.length;l++){var a=e.attributes[l];if(a.name!=="src"&&/\\.(jpg|jpeg|png|webp)/i.test(a.value)){r=!0;break}}if(r){var s=e.src.search(/base64\\s*/i)+7,h=e.src.length-s;h<133&&e.removeAttribute("src")}}if(!((e.src||e.srcset&&e.srcset!="null")&&e.className.toLowerCase().indexOf("lazy")===-1)){for(var c=0;c<e.attributes.length;c++)if(a=e.attributes[c],!(a.name==="src"||a.name==="srcset"||a.name==="alt")){var n=null;if(/\\.(jpg|jpeg|png|webp)\\s+\\d/.test(a.value)?n="srcset":/^\\s*\\S+\\.(jpg|jpeg|png|webp)\\S*\\s*$/.test(a.value)&&(n="src"),n){if(e.tagName==="IMG"||e.tagName==="PICTURE")e.setAttribute(n,a.value);else if(e.tagName==="FIGURE"&&!this._getAllNodesWithTag(e,["img","picture"]).length){var u=this._doc.createElement("img");u.setAttribute(n,a.value),e.appendChild(u)}}}}})},_getTextDensity:function(t,e){var i=this._getInnerText(t,!0).length;if(i===0)return 0;var r=0,l=this._getAllNodesWithTag(t,e);return this._forEachNode(l,a=>r+=this._getInnerText(a,!0).length),r/i},_cleanConditionally:function(t,e){this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)&&this._removeNodes(this._getAllNodesWithTag(t,[e]),function(i){var r=function(g){return g._readabilityDataTable},l=e==="ul"||e==="ol";if(!l){var a=0,s=this._getAllNodesWithTag(i,["ul","ol"]);this._forEachNode(s,g=>a+=this._getInnerText(g).length),l=a/this._getInnerText(i).length>.9}if(e==="table"&&r(i)||this._hasAncestorTag(i,"table",-1,r)||this._hasAncestorTag(i,"code"))return!1;var h=this._getClassWeight(i);this.log("Cleaning Conditionally",i);var c=0;if(h+c<0)return!0;if(this._getCharCount(i,",")<10){for(var n=i.getElementsByTagName("p").length,u=i.getElementsByTagName("img").length,m=i.getElementsByTagName("li").length-100,b=i.getElementsByTagName("input").length,N=this._getTextDensity(i,["h1","h2","h3","h4","h5","h6"]),v=0,y=this._getAllNodesWithTag(i,["object","embed","iframe"]),E=0;E<y.length;E++){for(var d=0;d<y[E].attributes.length;d++)if(this._allowedVideoRegex.test(y[E].attributes[d].value))return!1;if(y[E].tagName==="object"&&this._allowedVideoRegex.test(y[E].innerHTML))return!1;v++}var p=this._getLinkDensity(i),x=this._getInnerText(i).length,o=u>1&&n/u<.5&&!this._hasAncestorTag(i,"figure")||!l&&m>n||b>Math.floor(n/3)||!l&&N<.9&&x<25&&(u===0||u>2)&&!this._hasAncestorTag(i,"figure")||!l&&h<25&&p>.2||h>=25&&p>.5||v===1&&x<75||v>1;if(l&&o){for(var L=0;L<i.children.length;L++)if(i.children[L].children.length>1)return o;let g=i.getElementsByTagName("li").length;if(u==g)return!1}return o}return!1})},_cleanMatchedNodes:function(t,e){for(var i=this._getNextNode(t,!0),r=this._getNextNode(t);r&&r!=i;)e.call(this,r,r.className+" "+r.id)?r=this._removeAndGetNext(r):r=this._getNextNode(r)},_cleanHeaders:function(t){let e=this._getAllNodesWithTag(t,["h1","h2"]);this._removeNodes(e,function(i){let r=this._getClassWeight(i)<0;return r&&this.log("Removing header with low class weight:",i),r})},_headerDuplicatesTitle:function(t){if(t.tagName!="H1"&&t.tagName!="H2")return!1;var e=this._getInnerText(t,!1);return this.log("Evaluating similarity of header:",e,this._articleTitle),this._textSimilarity(this._articleTitle,e)>.75},_flagIsActive:function(t){return(this._flags&t)>0},_removeFlag:function(t){this._flags=this._flags&~t},_isProbablyVisible:function(t){return(!t.style||t.style.display!="none")&&(!t.style||t.style.visibility!="hidden")&&!t.hasAttribute("hidden")&&(!t.hasAttribute("aria-hidden")||t.getAttribute("aria-hidden")!="true"||t.className&&t.className.indexOf&&t.className.indexOf("fallback-image")!==-1)},parse:function(){if(this._maxElemsToParse>0){var t=this._doc.getElementsByTagName("*").length;if(t>this._maxElemsToParse)throw new Error("Aborting parsing document; "+t+" elements found")}this._unwrapNoscriptImages(this._doc);var e=this._disableJSONLD?{}:this._getJSONLD(this._doc);this._removeScripts(this._doc),this._prepDocument();var i=this._getArticleMetadata(e);this._articleTitle=i.title;var r=this._grabArticle();if(!r)return null;if(this.log("Grabbed: "+r.innerHTML),this._postProcessContent(r),!i.excerpt){var l=r.getElementsByTagName("p");l.length>0&&(i.excerpt=l[0].textContent.trim())}var a=r.textContent;return{title:this._articleTitle,byline:i.byline||this._articleByline,dir:this._articleDir,lang:this._articleLang,content:this._serializer(r),textContent:a,length:a.length,excerpt:i.excerpt,siteName:i.siteName||this._articleSiteName,publishedTime:i.publishedTime}}};typeof module=="object"&&(module.exports=q);\n';


--------------------------------------------------------------------------------
/src/libs/browser-search/search.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (c) 2025 Bytedance, Inc. and its affiliates.
  3 |  * SPDX-License-Identifier: Apache-2.0
  4 |  */
  5 | import { LocalBrowser, type BrowserInterface } from '../browser/index.js';
  6 | import { READABILITY_SCRIPT } from './readability.js';
  7 | import { Logger, defaultLogger } from '@agent-infra/logger';
  8 | import {
  9 |   extractPageInformation,
 10 |   toMarkdown,
 11 | } from './utils.js';
 12 | import { PromiseQueue } from './queue.js';
 13 | import { shouldSkipDomain, interceptRequest } from './utils.js';
 14 | import { getSearchEngine } from './engines/index.js';
 15 | import type {
 16 |   SearchResult,
 17 |   BrowserSearchOptions,
 18 |   BrowserSearchConfig,
 19 |   LocalBrowserSearchEngine,
 20 | } from './types.js';
 21 | 
 22 | /**
 23 |  * Service class for performing web searches and content extraction
 24 |  */
 25 | export class BrowserSearch {
 26 |   private logger: Logger;
 27 |   private browser: BrowserInterface;
 28 |   private isBrowserOpen = false;
 29 |   private defaultEngine: LocalBrowserSearchEngine;
 30 | 
 31 |   constructor(private config: BrowserSearchConfig = {}) {
 32 |     this.logger = config?.logger ?? defaultLogger;
 33 |     this.browser = config.browser ?? new LocalBrowser({ logger: this.logger });
 34 |     this.defaultEngine = config.defaultEngine ?? 'bing';
 35 |   }
 36 | 
 37 |   /**
 38 |    * Search web and extract content from result pages
 39 |    */
 40 |   async perform(options: BrowserSearchOptions) {
 41 |     this.logger.info('Starting search with options:', options);
 42 | 
 43 |     const queries = Array.isArray(options.query)
 44 |       ? options.query
 45 |       : [options.query];
 46 |     const excludeDomains = options.excludeDomains || [];
 47 |     const count =
 48 |       options.count && Math.max(3, Math.floor(options.count / queries.length));
 49 |     const engine = options.engine || this.defaultEngine;
 50 | 
 51 |     try {
 52 |       if (!this.isBrowserOpen) {
 53 |         this.logger.info('Launching browser');
 54 |         await this.browser.launch(this.config.browserOptions);
 55 |         this.isBrowserOpen = true;
 56 |       } else {
 57 |         this.logger.info('Using existing browser instance');
 58 |       }
 59 | 
 60 |       const queue = new PromiseQueue(options.concurrency || 15);
 61 |       const visitedUrls = new Set<string>();
 62 |       const results = await Promise.all(
 63 |         queries.map((query) =>
 64 |           this.search(this.browser, {
 65 |             query,
 66 |             count,
 67 |             queue,
 68 |             visitedUrls,
 69 |             excludeDomains,
 70 |             truncate: options.truncate,
 71 |             needVisitedUrls: options.needVisitedUrls,
 72 |             engine,
 73 |           }),
 74 |         ),
 75 |       );
 76 | 
 77 |       this.logger.success('Search completed successfully');
 78 |       return results.flat();
 79 |     } catch (error) {
 80 |       this.logger.error('Search failed:', error);
 81 |       return [];
 82 |     } finally {
 83 |       if (!options.keepBrowserOpen && this.isBrowserOpen) {
 84 |         await this.closeBrowser();
 85 |       }
 86 |     }
 87 |   }
 88 | 
 89 |   /**
 90 |    * Explicitly close the browser instance
 91 |    */
 92 |   async closeBrowser(): Promise<void> {
 93 |     if (this.isBrowserOpen) {
 94 |       this.logger.info('Closing browser');
 95 |       await this.browser.close();
 96 |       this.isBrowserOpen = false;
 97 |     }
 98 |   }
 99 | 
100 |   private async search(
101 |     browser: BrowserInterface,
102 |     options: {
103 |       query: string;
104 |       count?: number;
105 |       needVisitedUrls?: boolean;
106 |       excludeDomains: string[];
107 |       queue: PromiseQueue;
108 |       visitedUrls: Set<string>;
109 |       truncate?: number;
110 |       engine: LocalBrowserSearchEngine;
111 |     },
112 |   ) {
113 |     const searchEngine = getSearchEngine(options.engine);
114 |     const url = searchEngine.getSearchUrl(options.query, {
115 |       count: options.count,
116 |       excludeDomains: options.excludeDomains,
117 |     });
118 | 
119 |     this.logger.info(`Searching with ${options.engine} engine: ${url}`);
120 | 
121 |     let links = await browser.evaluateOnNewPage({
122 |       url,
123 |       waitForOptions: {
124 |         waitUntil: 'networkidle2',
125 |       },
126 |       pageFunction: searchEngine.extractSearchResults,
127 |       pageFunctionParams: [],
128 |       beforePageLoad: async (page) => {
129 |         await interceptRequest(page);
130 |       },
131 |       afterPageLoad: async (page) => {
132 |         if (searchEngine.waitForSearchResults)
133 |           await searchEngine.waitForSearchResults(page, 10000);
134 |       },
135 |     });
136 | 
137 |     this.logger.info(`Fetched ${links?.length ?? 0} links`);
138 | 
139 |     // Filter links
140 |     links =
141 |       links?.filter((link) => {
142 |         if (options.visitedUrls.has(link.url)) return false;
143 |         options.visitedUrls.add(link.url);
144 |         return !shouldSkipDomain(link.url);
145 |       }) || [];
146 | 
147 |     if (!links.length) {
148 |       this.logger.info('No valid links found');
149 |       return [];
150 |     }
151 | 
152 |     // Visit each link and extract content
153 |     const results = await Promise.allSettled(
154 |       options.needVisitedUrls
155 |         ? links.map((item) =>
156 |           options.queue.add(() => this.visitLink(this.browser, item)),
157 |         )
158 |         : links,
159 |     );
160 | 
161 |     return results
162 |       .map((result) => {
163 |         if (result.status === 'rejected' || !result.value) return null;
164 | 
165 |         return {
166 |           ...result.value,
167 |           content: options.truncate
168 |             ? result.value.content.slice(0, options.truncate)
169 |             : result.value.content,
170 |         };
171 |       }).filter((v): v is SearchResult => v !== null);
172 |   }
173 | 
174 |   private async visitLink(
175 |     browser: BrowserInterface,
176 |     item: SearchResult,
177 |   ): Promise<SearchResult | undefined> {
178 |     try {
179 |       this.logger.info('Visiting link:', item.url);
180 | 
181 |       const result = await browser.evaluateOnNewPage({
182 |         url: item.url,
183 |         pageFunction: extractPageInformation,
184 |         pageFunctionParams: [READABILITY_SCRIPT],
185 |         beforePageLoad: async (page) => {
186 |           await interceptRequest(page);
187 |         },
188 |       });
189 | 
190 |       if (result) {
191 |         const content = toMarkdown(result.content);
192 |         return { ...result, url: item.url, content, snippet: item.snippet };
193 |       }
194 |     } catch (e) {
195 |       this.logger.error('Failed to visit link:', e);
196 |     }
197 |   }
198 | }
199 | 
200 | declare global {
201 |   interface Window {
202 |     Readability: any;
203 |   }
204 | }
205 | 


--------------------------------------------------------------------------------
/src/libs/browser-search/types.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) 2025 Bytedance, Inc. and its affiliates.
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | import { BrowserInterface, LaunchOptions, Page } from '../browser/types.js';
 6 | import { Logger } from '@agent-infra/logger';
 7 | 
 8 | export type SearchResult = {
 9 |   title: string;
10 |   url: string;
11 |   snippet: string;
12 |   content: string;
13 | };
14 | 
15 | export type LocalBrowserSearchEngine = 'bing' | 'baidu' | 'sogou' | 'google';
16 | 
17 | export interface BrowserSearchOptions {
18 |   /**
19 |    * Search query
20 |    */
21 |   query: string | string[];
22 |   /**
23 |    * Max results length
24 |    */
25 |   count?: number;
26 |   /**
27 |    * Concurrency search
28 |    */
29 |   concurrency?: number;
30 |   /**
31 |    * Excluded domains
32 |    */
33 |   excludeDomains?: string[];
34 |   /**
35 |    * Max length to extract, rest content will be truncated
36 |    */
37 |   truncate?: number;
38 |   /**
39 |    * Control whether to keep the browser open after search finished
40 |    */
41 |   keepBrowserOpen?: boolean;
42 |   /**
43 |    * Search engine to use (default: 'google')
44 |    */
45 |   engine?: LocalBrowserSearchEngine;
46 |   /**
47 |    * need visited urls
48 |    * @default false
49 |    */
50 |   needVisitedUrls?: boolean;
51 | }
52 | 
53 | export interface BrowserSearchConfig {
54 |   /**
55 |    * Logger
56 |    */
57 |   logger?: Logger;
58 |   /**
59 |    * Custom browser
60 |    */
61 |   browser?: BrowserInterface;
62 |   /**
63 |    * Custom browser options
64 |    */
65 |   browserOptions?: LaunchOptions;
66 |   /**
67 |    * Set default search engine
68 |    *
69 |    * @default {'github'}
70 |    */
71 |   defaultEngine?: LocalBrowserSearchEngine;
72 | }
73 | 
74 | export interface SearchEngineAdapter {
75 |   /**
76 |    * Get search URL for the specific engine
77 |    */
78 |   getSearchUrl(
79 |     query: string,
80 |     options: {
81 |       count?: number;
82 |       excludeDomains?: string[];
83 |     },
84 |   ): string;
85 | 
86 |   /**
87 |    * Extract search results from the page
88 |    */
89 |   extractSearchResults(window: Window): SearchResult[];
90 | 
91 |   /**
92 |    * Wait for search results to load
93 |    */
94 |   waitForSearchResults?(page: Page, timeout?: number): Promise<void>;
95 | }


--------------------------------------------------------------------------------
/src/libs/browser-search/utils.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * The following code is based on
  3 |  * https://github.com/bytedance/UI-TARS-desktop/tree/main/packages/agent-infra/search/browser-search
  4 |  * 
  5 |  * Copyright (c) 2025 Bytedance, Inc. and its affiliates.
  6 |  * SPDX-License-Identifier: Apache-2.0
  7 |  */
  8 | import Turndown from 'turndown';
  9 | import { gfm } from 'turndown-plugin-gfm';
 10 | import { defaultLogger as logger } from '@agent-infra/logger';
 11 | import { Page } from '../browser/index.js';
 12 | import UserAgent from 'user-agents';
 13 | 
 14 | /**
 15 |  * Safely parses a URL string into a URL object
 16 |  * @param url - The URL string to parse
 17 |  * @returns URL object or null if invalid
 18 |  */
 19 | const parseUrl = (url: string) => {
 20 |   try {
 21 |     return new URL(url);
 22 |   } catch {
 23 |     return null;
 24 |   }
 25 | };
 26 | 
 27 | /**
 28 |  * Determines if a domain should be skipped based on a blocklist
 29 |  * @param url - The URL to check
 30 |  * @returns True if the domain should be skipped, false otherwise
 31 |  */
 32 | export const shouldSkipDomain = (url: string) => {
 33 |   const parsed = parseUrl(url);
 34 |   if (!parsed) return true;
 35 | 
 36 |   const { hostname } = parsed;
 37 |   return [
 38 |     'reddit.com',
 39 |     'www.reddit.com',
 40 |     'x.com',
 41 |     'twitter.com',
 42 |     'www.twitter.com',
 43 |     'youtube.com',
 44 |     'www.youtube.com',
 45 |   ].includes(hostname);
 46 | };
 47 | 
 48 | /**
 49 |  * Applies various stealth techniques to make the browser appear more like a regular user browser
 50 |  * @param page - Puppeteer page object
 51 |  */
 52 | export async function applyStealthScripts(page: Page) {
 53 |   const userAgent = new UserAgent({
 54 |     deviceCategory: 'desktop',
 55 |   }).toString();
 56 |   await page.setBypassCSP(true);
 57 |   await page.setUserAgent(userAgent);
 58 | 
 59 |   /**
 60 |    * https://intoli.com/blog/not-possible-to-block-chrome-headless/chrome-headless-test.html
 61 |    */
 62 |   await page.evaluate(() => {
 63 |     /**
 64 |      * Override the navigator.webdriver property
 65 |      * The webdriver read-only property of the navigator interface indicates whether the user agent is controlled by automation.
 66 |      * @see https://developer.mozilla.org/en-US/docs/Web/API/Navigator/webdriver
 67 |      */
 68 |     Object.defineProperty(navigator, 'webdriver', {
 69 |       get: () => undefined,
 70 |     });
 71 | 
 72 |     // Mock languages and plugins to mimic a real browser
 73 |     Object.defineProperty(navigator, 'languages', {
 74 |       get: () => ['en-US', 'en'],
 75 |     });
 76 | 
 77 |     Object.defineProperty(navigator, 'plugins', {
 78 |       get: () => [{}, {}, {}, {}, {}],
 79 |     });
 80 | 
 81 |     // Redefine the headless property
 82 |     Object.defineProperty(navigator, 'headless', {
 83 |       get: () => false,
 84 |     });
 85 | 
 86 |     // Override the permissions API
 87 |     const originalQuery = window.navigator.permissions.query;
 88 |     window.navigator.permissions.query = (parameters) =>
 89 |       parameters.name === 'notifications'
 90 |         ? Promise.resolve({
 91 |           state: Notification.permission,
 92 |         } as PermissionStatus)
 93 |         : originalQuery(parameters);
 94 |   });
 95 | }
 96 | 
 97 | /**
 98 |  * Sets up request interception to block unnecessary resources and apply stealth techniques
 99 |  * @param page - Puppeteer page object
100 |  */
101 | export async function interceptRequest(page: Page) {
102 |   await applyStealthScripts(page);
103 |   await page.setRequestInterception(true);
104 | 
105 |   page.on('request', (request) => {
106 |     const resourceType = request.resourceType();
107 | 
108 |     if (resourceType !== 'document') {
109 |       return request.abort();
110 |     }
111 | 
112 |     if (request.isNavigationRequest()) {
113 |       return request.continue();
114 |     }
115 | 
116 |     return request.abort();
117 |   });
118 | }
119 | 
120 | /**
121 |  * Interface representing extracted page information
122 |  */
123 | interface PageInfo {
124 |   /** Page title */
125 |   title: string;
126 |   /** Page content in HTML format */
127 |   content: string;
128 | }
129 | 
130 | /**
131 |  * !NOTE: This function runs in the context of the browser page, not Node.js
132 |  * 
133 |  * Extracts readable content from a web page using Readability
134 |  * @param window Browser window object
135 |  * @param readabilityScript Readability library script as string
136 |  * @returns Extracted page information (title and content)
137 |  */
138 | export function extractPageInformation(
139 |   window: Window,
140 |   readabilityScript: string,
141 | ): PageInfo {
142 |   const Readability = new Function(
143 |     'module',
144 |     `${readabilityScript}\nreturn module.exports`,
145 |   )({});
146 | 
147 |   const document = window.document;
148 | 
149 |   // Remove non-content elements to improve extraction quality
150 |   document
151 |     .querySelectorAll(
152 |       'script,noscript,style,link,svg,img,video,iframe,canvas,.reflist',
153 |     )
154 |     .forEach((el) => el.remove());
155 | 
156 |   // Parse the document using Readability
157 |   const article = new Readability(document).parse();
158 |   const content = article?.content || '';
159 |   const title = document.title;
160 | 
161 |   return {
162 |     content,
163 |     title: article?.title || title,
164 |   };
165 | }
166 | 
167 | export interface ToMarkdownOptions extends Turndown.Options {
168 |   gfmExtension?: boolean;
169 | }
170 | 
171 | /**
172 |  * Convert HTML content to Markdown format
173 |  * @param html HTML string
174 |  * @param options Conversion options
175 |  * @returns Markdown string
176 |  */
177 | export function toMarkdown(
178 |   html: string,
179 |   options: ToMarkdownOptions = {},
180 | ): string {
181 |   if (!html) return '';
182 | 
183 |   try {
184 |     const {
185 |       codeBlockStyle = 'fenced',
186 |       headingStyle = 'atx',
187 |       emDelimiter = '*',
188 |       strongDelimiter = '**',
189 |       gfmExtension = true,
190 |     } = options;
191 | 
192 |     const turndown = new Turndown({
193 |       codeBlockStyle,
194 |       headingStyle,
195 |       emDelimiter,
196 |       strongDelimiter,
197 |     });
198 | 
199 |     if (gfmExtension) {
200 |       turndown.use(gfm);
201 |     }
202 | 
203 |     return turndown.turndown(html);
204 |   } catch (error) {
205 |     logger.error('Error converting HTML to Markdown:', error);
206 |     return html;
207 |   }
208 | }
209 | 


--------------------------------------------------------------------------------
/src/libs/browser/base.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * The following code is based on
  3 |  * https://github.com/bytedance/UI-TARS-desktop/tree/main/packages/agent-infra/browser
  4 |  * 
  5 |  * Copyright (c) 2025 Bytedance, Inc. and its affiliates.
  6 |  * SPDX-License-Identifier: Apache-2.0
  7 |  */
  8 | import * as puppeteer from 'puppeteer-core';
  9 | import { Logger, defaultLogger } from '@agent-infra/logger';
 10 | import {
 11 |   BrowserInterface,
 12 |   EvaluateOnNewPageOptions,
 13 |   LaunchOptions,
 14 |   Page,
 15 | } from './types.js';
 16 | 
 17 | /**
 18 |  * Configuration options for the BaseBrowser class
 19 |  * @interface BaseBrowserOptions
 20 |  * @property {Logger} [logger] - Custom logger instance to use for browser logging
 21 |  */
 22 | export interface BaseBrowserOptions {
 23 |   logger?: Logger;
 24 | }
 25 | 
 26 | /**
 27 |  * Abstract base class that implements common browser automation functionality
 28 |  * Provides a foundation for specific browser implementations with shared capabilities
 29 |  * @abstract
 30 |  * @implements {BrowserInterface}
 31 |  */
 32 | export abstract class BaseBrowser implements BrowserInterface {
 33 |   /**
 34 |    * The underlying Puppeteer browser instance
 35 |    * @protected
 36 |    */
 37 |   protected browser: puppeteer.Browser | null = null;
 38 | 
 39 |   /**
 40 |    * Logger instance for browser-related logging
 41 |    * @protected
 42 |    */
 43 |   protected logger: Logger;
 44 | 
 45 |   /**
 46 |    * Reference to the currently active browser page
 47 |    * @protected
 48 |    */
 49 |   protected activePage: Page | null = null;
 50 | 
 51 |   /**
 52 |    * Creates an instance of BaseBrowser
 53 |    * @param {BaseBrowserOptions} [options] - Configuration options
 54 |    */
 55 |   constructor(options?: BaseBrowserOptions) {
 56 |     this.logger = options?.logger ?? defaultLogger;
 57 |     this.logger.info('Browser Options:', options);
 58 |   }
 59 | 
 60 |   /**
 61 |    * Get the underlying Puppeteer browser instance
 62 |    * @throws Error if browser is not launched
 63 | 
 64 |    * @returns {puppeteer.Browser} Puppeteer browser instance
 65 |    */
 66 |   getBrowser(): puppeteer.Browser {
 67 |     if (!this.browser) {
 68 |       throw new Error('Browser not launched');
 69 |     }
 70 |     return this.browser;
 71 |   }
 72 | 
 73 |   /**
 74 |    * Sets up listeners for browser page events
 75 |    * Tracks page creation and updates active page reference
 76 |    * @protected
 77 |    */
 78 |   protected async setupPageListener() {
 79 |     if (!this.browser) return;
 80 | 
 81 |     this.browser.on('targetcreated', async (target) => {
 82 |       const page = await target.page();
 83 |       if (page) {
 84 |         this.logger.info('New page created:', await page.url());
 85 |         this.activePage = page;
 86 | 
 87 |         page.once('close', () => {
 88 |           if (this.activePage === page) {
 89 |             this.activePage = null;
 90 |           }
 91 |         });
 92 | 
 93 |         page.once('error', () => {
 94 |           if (this.activePage === page) {
 95 |             this.activePage = null;
 96 |           }
 97 |         });
 98 |       }
 99 |     });
100 |   }
101 | 
102 |   /**
103 |    * Launches the browser with specified options
104 |    * @abstract
105 |    * @param {LaunchOptions} [options] - Browser launch configuration options
106 |    * @returns {Promise<void>} Promise that resolves when browser is launched
107 |    */
108 |   abstract launch(options?: LaunchOptions): Promise<void>;
109 | 
110 |   /**
111 |    * Closes the browser instance and cleans up resources
112 |    * @returns {Promise<void>} Promise that resolves when browser is closed
113 |    * @throws {Error} If browser fails to close properly
114 |    */
115 |   async close(): Promise<void> {
116 |     this.logger.info('Closing browser');
117 |     try {
118 |       await this.browser?.close();
119 |       this.browser = null;
120 |       this.logger.success('Browser closed successfully');
121 |     } catch (error) {
122 |       this.logger.error('Failed to close browser:', error);
123 |       throw error;
124 |     }
125 |   }
126 | 
127 |   /**
128 |    * Creates a new page, navigates to the specified URL, executes a function in the page context, and returns the result
129 |    * This method is inspired and modified from https://github.com/egoist/local-web-search/blob/04608ed09aa103e2fff6402c72ca12edfb692d19/src/browser.ts#L74
130 |    * @template T - Type of parameters passed to the page function
131 |    * @template R - Return type of the page function
132 |    * @param {EvaluateOnNewPageOptions<T, R>} options - Configuration options for the page evaluation
133 |    * @returns {Promise<R | null>} Promise resolving to the result of the page function or null
134 |    * @throws {Error} If page creation or evaluation fails
135 |    */
136 |   async evaluateOnNewPage<T extends any[], R>(
137 |     options: EvaluateOnNewPageOptions<T, R>,
138 |   ): Promise<R | null> {
139 |     const {
140 |       url,
141 |       pageFunction,
142 |       pageFunctionParams,
143 |       beforePageLoad,
144 |       afterPageLoad,
145 |       beforeSendResult,
146 |       waitForOptions,
147 |     } = options;
148 |     const page = await this.browser!.newPage();
149 |     try {
150 |       await beforePageLoad?.(page);
151 |       await page.goto(url, {
152 |         waitUntil: 'networkidle2',
153 |         ...waitForOptions,
154 |       });
155 |       await afterPageLoad?.(page);
156 |       const _window = await page.evaluateHandle(() => window);
157 |       const result = await page.evaluate(
158 |         pageFunction,
159 |         _window,
160 |         ...pageFunctionParams,
161 |       );
162 |       await beforeSendResult?.(page, result);
163 |       await _window.dispose();
164 |       await page.close();
165 |       return result;
166 |     } catch (error) {
167 |       await page.close();
168 |       throw error;
169 |     }
170 |   }
171 | 
172 |   /**
173 |    * Creates a new browser page
174 |    * @returns {Promise<Page>} Promise resolving to the newly created page
175 |    * @throws {Error} If browser is not launched or page creation fails
176 |    */
177 |   async createPage(): Promise<Page> {
178 |     if (!this.browser) {
179 |       this.logger.error('No active browser');
180 |       throw new Error('Browser not launched');
181 |     }
182 |     const page = await this.browser.newPage();
183 |     return page;
184 |   }
185 | 
186 |   /**
187 |    * Gets the currently active page or finds an active page if none is currently tracked
188 |    * If no active pages exist, creates a new page
189 |    * @returns {Promise<Page>} Promise resolving to the active page
190 |    * @throws {Error} If browser is not launched or no active page can be found/created
191 |    */
192 |   async getActivePage(): Promise<Page> {
193 |     if (!this.browser) {
194 |       throw new Error('Browser not launched');
195 |     }
196 | 
197 |     // If activePage exists and is still available, return directly
198 |     if (this.activePage) {
199 |       try {
200 |         // Verify that the page is still available
201 |         await this.activePage.evaluate(() => document.readyState);
202 |         return this.activePage;
203 |       } catch (e) {
204 |         this.logger.warn('Active page no longer available:', e);
205 |         this.activePage = null;
206 |       }
207 |     }
208 | 
209 |     // Get all pages and find the last active page
210 |     const pages = await this.browser.pages();
211 | 
212 |     if (pages.length === 0) {
213 |       this.activePage = await this.createPage();
214 |       return this.activePage;
215 |     }
216 | 
217 |     // Find the last responding page
218 |     for (let i = pages.length - 1; i >= 0; i--) {
219 |       const page = pages[i];
220 |       try {
221 |         await page.evaluate(() => document.readyState);
222 |         this.activePage = page;
223 |         return page;
224 |       } catch (e) {
225 |         continue;
226 |       }
227 |     }
228 | 
229 |     throw new Error('No active page found');
230 |   }
231 | }


--------------------------------------------------------------------------------
/src/libs/browser/finder.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * The following code is modified based on
  3 |  * https://github.com/egoist/local-web-search/blob/main/src/find-browser.ts
  4 |  * Copy from
  5 |  * https://github.com/bytedance/UI-TARS-desktop/blob/main/packages/agent-infra/browser/src/browser-finder.ts
  6 |  * 
  7 |  * MIT Licensed
  8 |  * Copyright (c) 2025 ChatWise (https://chatwise.app) <kevin@chatwise.app>
  9 |  * https://github.com/egoist/local-web-search/blob/main/LICENSE
 10 |  */
 11 | 
 12 | import * as fs from 'fs';
 13 | import * as path from 'path';
 14 | import * as os from 'os';
 15 | import { Logger, defaultLogger } from '@agent-infra/logger';
 16 | 
 17 | /**
 18 |  * Interface defining browser locations and configurations
 19 |  * Contains paths and settings for different operating systems
 20 |  * @interface Browser
 21 |  */
 22 | interface Browser {
 23 |   /**
 24 |    * Browser name identifier
 25 |    */
 26 |   name: string;
 27 | 
 28 |   /**
 29 |    * Executable paths by platform
 30 |    * @property {string} win32 - Windows executable path
 31 |    * @property {string} darwin - macOS executable path
 32 |    * @property {string} linux - Linux executable path
 33 |    */
 34 |   executable: {
 35 |     win32: string;
 36 |     darwin: string;
 37 |     linux: string;
 38 |   };
 39 | 
 40 |   /**
 41 |    * User data directory paths by platform
 42 |    * @property {string} win32 - Windows user data directory
 43 |    * @property {string} darwin - macOS user data directory
 44 |    * @property {string} linux - Linux user data directory
 45 |    */
 46 |   userDataDir: {
 47 |     win32: string;
 48 |     darwin: string;
 49 |     linux: string;
 50 |   };
 51 | }
 52 | 
 53 | /**
 54 |  * Class responsible for finding and managing browser installations
 55 |  * Detects installed browsers and their profiles across different platforms
 56 |  */
 57 | export class BrowserFinder {
 58 |   /**
 59 |    * Logger instance for diagnostic output
 60 |    */
 61 |   private logger: Logger;
 62 | 
 63 |   /**
 64 |    * Creates a new BrowserFinder instance
 65 |    * @param {Logger} [logger] - Optional custom logger
 66 |    */
 67 |   constructor(logger?: Logger) {
 68 |     this.logger = logger ?? defaultLogger;
 69 |   }
 70 | 
 71 |   /**
 72 |    * Getter that returns the list of supported browsers with their platform-specific paths
 73 |    * @returns {Browser[]} Array of browser configurations
 74 |    * @private
 75 |    */
 76 |   private get browsers(): Browser[] {
 77 |     // Get HOME_DIR inside the getter to ensure it's always current
 78 |     const HOME_DIR = os.homedir();
 79 |     const LOCAL_APP_DATA = process.env.LOCALAPPDATA;
 80 | 
 81 |     return [
 82 |       {
 83 |         name: 'Chromium',
 84 |         executable: {
 85 |           win32: 'C:\\Program Files\\Chromium\\Application\\chrome.exe',
 86 |           darwin: '/Applications/Chromium.app/Contents/MacOS/Chromium',
 87 |           linux: '/usr/bin/chromium',
 88 |         },
 89 |         userDataDir: {
 90 |           win32: `${LOCAL_APP_DATA}\\Chromium\\User Data`,
 91 |           darwin: `${HOME_DIR}/Library/Application Support/Chromium`,
 92 |           linux: `${HOME_DIR}/.config/chromium`,
 93 |         },
 94 |       },
 95 |       {
 96 |         name: 'Google Chrome',
 97 |         executable: {
 98 |           win32: 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe',
 99 |           darwin:
100 |             '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
101 |           linux: '/usr/bin/google-chrome',
102 |         },
103 |         userDataDir: {
104 |           win32: `${LOCAL_APP_DATA}\\Google\\Chrome\\User Data`,
105 |           darwin: `${HOME_DIR}/Library/Application Support/Google/Chrome`,
106 |           linux: `${HOME_DIR}/.config/google-chrome`,
107 |         },
108 |       },
109 |       {
110 |         name: 'Google Chrome Canary',
111 |         executable: {
112 |           win32:
113 |             'C:\\Program Files\\Google\\Chrome Canary\\Application\\chrome.exe',
114 |           darwin:
115 |             '/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
116 |           linux: '/usr/bin/google-chrome-canary',
117 |         },
118 |         userDataDir: {
119 |           win32: `${LOCAL_APP_DATA}\\Google\\Chrome Canary\\User Data`,
120 |           darwin: `${HOME_DIR}/Library/Application Support/Google/Chrome Canary`,
121 |           linux: `${HOME_DIR}/.config/google-chrome-canary`,
122 |         },
123 |       },
124 |     ];
125 |   }
126 | 
127 |   /**
128 |    * Find a specific browser or the first available browser
129 |    * @param {string} [name] - Optional browser name to find
130 |    * @returns {{ executable: string; userDataDir: string }} Browser executable and user data paths
131 |    * @throws {Error} If no supported browser is found or the platform is unsupported
132 |    */
133 |   findBrowser(name?: string): {
134 |     executable: string;
135 |     userDataDir: string;
136 |   } {
137 |     const platform = process.platform;
138 |     this.logger.info('Finding browser on platform:', platform);
139 | 
140 |     if (platform !== 'darwin' && platform !== 'win32' && platform !== 'linux') {
141 |       const error = new Error(`Unsupported platform: ${platform}`);
142 |       this.logger.error(error.message);
143 |       throw error;
144 |     }
145 | 
146 |     const browser = name
147 |       ? this.browsers.find(
148 |         (b) => b.name === name && fs.existsSync(b.executable[platform]),
149 |       )
150 |       : this.browsers.find((b) => fs.existsSync(b.executable[platform]));
151 | 
152 |     this.logger.log('browser', browser);
153 | 
154 |     if (!browser) {
155 |       const error = name
156 |         ? new Error(`Cannot find browser: ${name}`)
157 |         : new Error(
158 |           'Cannot find a supported browser on your system. Please install Chrome, Edge, or Brave.',
159 |         );
160 |       this.logger.error(error.message);
161 |       throw error;
162 |     }
163 | 
164 |     const result = {
165 |       executable: browser.executable[platform],
166 |       userDataDir: browser.userDataDir[platform],
167 |     };
168 | 
169 |     this.logger.success(`Found browser: ${browser.name}`);
170 |     this.logger.info('Browser details:', result);
171 | 
172 |     return result;
173 |   }
174 | 
175 |   /**
176 |    * Get browser profiles for a specific browser
177 |    * Reads the Local State file to extract profile information
178 |    * @param {string} [browserName] - Optional browser name to get profiles for
179 |    * @returns {Array<{ displayName: string; path: string }>} Array of profile objects with display names and paths
180 |    */
181 |   getBrowserProfiles(
182 |     browserName?: string,
183 |   ): Array<{ displayName: string; path: string }> {
184 |     const browser = this.findBrowser(browserName);
185 | 
186 |     try {
187 |       const localState = JSON.parse(
188 |         fs.readFileSync(path.join(browser.userDataDir, 'Local State'), 'utf8'),
189 |       );
190 |       const profileInfo = localState.profile.info_cache;
191 | 
192 |       return Object.entries(profileInfo).map(
193 |         ([profileName, info]: [string, any]) => ({
194 |           displayName: info.name,
195 |           path: path.join(browser.userDataDir, profileName),
196 |         }),
197 |       );
198 |     } catch (error) {
199 |       return [];
200 |     }
201 |   }
202 | 
203 |   /**
204 |    * Legacy method for backwards compatibility
205 |    * Finds Chrome browser executable path
206 |    * @deprecated Use findBrowser instead
207 |    * @returns {string | null} Chrome executable path or null if not found
208 |    */
209 |   findChrome(): string | null {
210 |     try {
211 |       const { executable } = this.findBrowser('Google Chrome');
212 |       return executable;
213 |     } catch {
214 |       return null;
215 |     }
216 |   }
217 | }
218 | 


--------------------------------------------------------------------------------
/src/libs/browser/index.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The following code is based on
 3 |  * https://github.com/bytedance/UI-TARS-desktop/tree/main/packages/agent-infra/browser
 4 |  * 
 5 |  * Copyright (c) 2025 Bytedance, Inc. and its affiliates.
 6 |  * SPDX-License-Identifier: Apache-2.0
 7 |  */
 8 | 
 9 | /**
10 |  * @agent-infra/browser
11 |  * A browser automation library based on puppeteer-core
12 |  *
13 |  * Main exports:
14 |  * - types: Type definitions for browser interfaces
15 |  * - BrowserFinder: Utility to detect and locate installed browsers
16 |  * - LocalBrowser: Control locally installed browsers
17 |  * - RemoteBrowser: Connect to remote browser instances
18 |  * - BaseBrowser: Abstract base class for browser implementations
19 |  */
20 | export * from './types.js';
21 | export * from './finder.js';
22 | export * from './base.js';
23 | export * from './local.js';
24 | export * from './remote.js';


--------------------------------------------------------------------------------
/src/libs/browser/local.ts:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025 Bytedance, Inc. and its affiliates.
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | import * as puppeteer from 'puppeteer-core';
 6 | import { LaunchOptions } from './types.js';
 7 | import { BrowserFinder } from './finder.js';
 8 | import { BaseBrowser } from './base.js';
 9 | 
10 | /**
11 |  * LocalBrowser class for controlling locally installed browsers
12 |  * Extends the BaseBrowser with functionality specific to managing local browser instances
13 |  * @extends BaseBrowser
14 |  */
15 | export class LocalBrowser extends BaseBrowser {
16 |   /**
17 |    * Browser finder instance to detect and locate installed browsers
18 |    * @private
19 |    */
20 |   private browserFinder = new BrowserFinder();
21 | 
22 |   /**
23 |    * Launches a local browser instance with specified options
24 |    * Automatically detects installed browsers if no executable path is provided
25 |    * @param {LaunchOptions} options - Configuration options for launching the browser
26 |    * @returns {Promise<void>} Promise that resolves when the browser is successfully launched
27 |    * @throws {Error} If the browser cannot be launched
28 |    */
29 |   async launch(options: LaunchOptions = {}): Promise<void> {
30 |     this.logger.info('Launching browser with options:', options);
31 | 
32 |     const executablePath =
33 |       options?.executablePath || this.browserFinder.findBrowser().executable;
34 | 
35 |     this.logger.info('Using executable path:', executablePath);
36 | 
37 |     const viewportWidth = options?.defaultViewport?.width ?? 1280;
38 |     const viewportHeight = options?.defaultViewport?.height ?? 800;
39 | 
40 |     const puppeteerLaunchOptions: puppeteer.LaunchOptions = {
41 |       executablePath,
42 |       headless: options?.headless ?? false,
43 |       defaultViewport: {
44 |         width: viewportWidth,
45 |         height: viewportHeight,
46 |       },
47 |       args: [
48 |         '--no-sandbox',
49 |         '--mute-audio',
50 |         '--disable-gpu',
51 |         '--disable-http2',
52 |         '--disable-blink-features=AutomationControlled',
53 |         '--disable-infobars',
54 |         '--disable-background-timer-throttling',
55 |         '--disable-popup-blocking',
56 |         '--disable-backgrounding-occluded-windows',
57 |         '--disable-renderer-backgrounding',
58 |         '--disable-window-activation',
59 |         '--disable-focus-on-load',
60 |         '--no-default-browser-check', // disable default browser check
61 |         '--disable-web-security', // disable CORS
62 |         '--disable-features=IsolateOrigins,site-per-process',
63 |         '--disable-site-isolation-trials',
64 |         `--window-size=${viewportWidth},${viewportHeight + 90}`,
65 |         options?.proxy ? `--proxy-server=${options.proxy}` : '',
66 |         options?.profilePath
67 |           ? `--profile-directory=${options.profilePath}`
68 |           : '',
69 |       ].filter(Boolean),
70 |       ignoreDefaultArgs: ['--enable-automation'],
71 |       timeout: options.timeout ?? 0,
72 |       downloadBehavior: {
73 |         policy: 'deny',
74 |       },
75 |     };
76 | 
77 |     this.logger.info('Launch options:', puppeteerLaunchOptions);
78 | 
79 |     try {
80 |       this.browser = await puppeteer.launch(puppeteerLaunchOptions);
81 |       await this.setupPageListener();
82 |       this.logger.success('Browser launched successfully');
83 |     } catch (error) {
84 |       this.logger.error('Failed to launch browser:', error);
85 |       throw error;
86 |     }
87 |   }
88 | }


--------------------------------------------------------------------------------
/src/libs/browser/remote.ts:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2025 Bytedance, Inc. and its affiliates.
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | import * as puppeteer from 'puppeteer-core';
 6 | import { BaseBrowser, BaseBrowserOptions } from './base.js';
 7 | import { LaunchOptions } from './types.js';
 8 | 
 9 | /**
10 |  * Configuration options for RemoteBrowser
11 |  * @extends BaseBrowserOptions
12 |  * @interface RemoteBrowserOptions
13 |  * @property {string} [wsEndpoint] - WebSocket endpoint URL for direct connection
14 |  * @property {string} [host] - Remote host address (default: 'localhost')
15 |  * @property {number} [port] - Remote debugging port (default: 9222)
16 |  */
17 | export interface RemoteBrowserOptions extends BaseBrowserOptions {
18 |   wsEndpoint?: string;
19 |   host?: string;
20 |   port?: number;
21 | }
22 | 
23 | /**
24 |  * RemoteBrowser class for connecting to remote browser instances
25 |  *
26 |  * Currently, this RemoteBrowser is not production ready,
27 |  * mainly because it still relies on `puppeteer-core`,
28 |  * which can only run on Node.js.
29 |  *
30 |  * At the same time, Chrome instances built with
31 |  * `--remote-debugging-address` on Linux have security risks
32 |  *
33 |  * @see https://issues.chromium.org/issues/41487252
34 |  * @see https://issues.chromium.org/issues/40261787
35 |  * @see https://github.com/pyppeteer/pyppeteer/pull/379
36 |  * @see https://stackoverflow.com/questions/72760355/chrome-remote-debugging-not-working-computer-to-computer
37 |  *
38 |  * @extends BaseBrowser
39 |  */
40 | export class RemoteBrowser extends BaseBrowser {
41 |   /**
42 |    * Creates a new RemoteBrowser instance
43 |    * @param {RemoteBrowserOptions} [options] - Configuration options for remote browser connection
44 |    */
45 |   constructor(private options?: RemoteBrowserOptions) {
46 |     super(options);
47 |   }
48 | 
49 |   /**
50 |    * Connects to a remote browser instance using WebSocket
51 |    * If no WebSocket endpoint is provided, attempts to discover it using the DevTools Protocol
52 |    * @param {LaunchOptions} [options] - Launch configuration options
53 |    * @returns {Promise<void>} Promise that resolves when connected to the remote browser
54 |    * @throws {Error} If connection to the remote browser fails
55 |    */
56 |   async launch(options?: LaunchOptions): Promise<void> {
57 |     this.logger.info('Browser Launch options:', options);
58 | 
59 |     let browserWSEndpoint = this.options?.wsEndpoint;
60 | 
61 |     if (!browserWSEndpoint) {
62 |       const host = this.options?.host || 'localhost';
63 |       const port = this.options?.port || 9222;
64 |       const response = await fetch(`http://${host}:${port}/json/version`);
65 |       const { webSocketDebuggerUrl } = await response.json();
66 |       browserWSEndpoint = webSocketDebuggerUrl;
67 |     }
68 | 
69 |     this.logger.info('Using WebSocket endpoint:', browserWSEndpoint);
70 | 
71 |     const puppeteerConnectOptions: puppeteer.ConnectOptions = {
72 |       browserWSEndpoint,
73 |       defaultViewport: options?.defaultViewport ?? { width: 1280, height: 800 },
74 |     };
75 | 
76 |     try {
77 |       this.browser = await puppeteer.connect(puppeteerConnectOptions);
78 |       await this.setupPageListener();
79 |       this.logger.success('Connected to remote browser successfully');
80 |     } catch (error) {
81 |       this.logger.error('Failed to connect to remote browser:', error);
82 |       throw error;
83 |     }
84 |   }
85 | }


--------------------------------------------------------------------------------
/src/libs/browser/types.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * The following code is based on
  3 |  * https://github.com/bytedance/UI-TARS-desktop/tree/main/packages/agent-infra/browser
  4 |  * 
  5 |  * Copyright (c) 2025 Bytedance, Inc. and its affiliates.
  6 |  * SPDX-License-Identifier: Apache-2.0
  7 |  */
  8 | import { Page, WaitForOptions } from 'puppeteer-core';
  9 | 
 10 | /**
 11 |  * Options for launching a browser instance
 12 |  * @interface LaunchOptions
 13 |  */
 14 | export interface LaunchOptions {
 15 |   /**
 16 |    * Whether to run browser in headless mode
 17 |    * @default false
 18 |    */
 19 |   headless?: boolean;
 20 | 
 21 |   /**
 22 |    * Maximum time in milliseconds to wait for the browser to start
 23 |    * @default 0 (no timeout)
 24 |    */
 25 |   timeout?: number;
 26 | 
 27 |   /**
 28 |    * The viewport dimensions
 29 |    * @property {number} width - Viewport width in pixels
 30 |    * @property {number} height - Viewport height in pixels
 31 |    */
 32 |   defaultViewport?: {
 33 |     width: number;
 34 |     height: number;
 35 |   };
 36 | 
 37 |   /**
 38 |    * Path to a browser executable to use instead of the automatically detected one
 39 |    * If not provided, the system will attempt to find an installed browser
 40 |    */
 41 |   executablePath?: string;
 42 | 
 43 |   /**
 44 |    * Path to a specific browser profile to use
 45 |    * Allows using existing browser profiles with cookies, extensions, etc.
 46 |    */
 47 |   profilePath?: string;
 48 | 
 49 |   /**
 50 |    * Proxy server URL, e.g. 'http://proxy.example.com:8080'
 51 |    * Used to route browser traffic through a proxy server
 52 |    */
 53 |   proxy?: string;
 54 | }
 55 | 
 56 | /**
 57 |  * Options for evaluating JavaScript in a new page
 58 |  * @template T - Array of parameters to pass to the page function
 59 |  * @template R - Return type of the page function
 60 |  * @interface EvaluateOnNewPageOptions
 61 |  */
 62 | export interface EvaluateOnNewPageOptions<T extends any[], R> {
 63 |   /**
 64 |    * URL to navigate to before evaluating the function
 65 |    * The page will load this URL before executing the pageFunction
 66 |    */
 67 |   url: string;
 68 | 
 69 |   /**
 70 |    * Options for waiting for the page to load
 71 |    */
 72 |   waitForOptions?: WaitForOptions;
 73 | 
 74 |   /**
 75 |    * Function to be evaluated in the page context
 76 |    * This function runs in the context of the browser page, not Node.js
 77 |    * @param {Window} window - The window object of the page
 78 |    * @param {...T} args - Additional arguments passed to the function
 79 |    * @returns {R} Result of the function execution
 80 |    */
 81 |   pageFunction: (window: Window, ...args: T) => R;
 82 | 
 83 |   /**
 84 |    * Parameters to pass to the page function
 85 |    * These values will be serialized and passed to the pageFunction
 86 |    */
 87 |   pageFunctionParams: T;
 88 | 
 89 |   /**
 90 |    * Optional function to execute before page navigation
 91 |    * Useful for setting up page configuration before loading the URL
 92 |    * @param {Page} page - Puppeteer page instance
 93 |    * @returns {void | Promise<void>}
 94 |    */
 95 |   beforePageLoad?: (page: Page) => void | Promise<void>;
 96 | 
 97 |   /**
 98 |    * Optional function to execute after page navigation
 99 |    * Useful for setting up page configuration after loading the URL
100 |    * @param {Page} page - Puppeteer page instance
101 |    * @returns {void | Promise<void>}
102 |    */
103 |   afterPageLoad?: (page: Page) => void | Promise<void>;
104 | 
105 |   /**
106 |    * Optional function to process the result before returning
107 |    * Can be used to transform or validate the result from page evaluation
108 |    * @param {Page} page - Puppeteer page instance
109 |    * @param {R} result - Result from page function evaluation
110 |    * @returns {R | Promise<R>} Processed result
111 |    */
112 |   beforeSendResult?: (page: Page, result: R) => R | Promise<R>;
113 | }
114 | 
115 | /**
116 |  * Core browser interface that all browser implementations must implement
117 |  * Defines the standard API for browser automation
118 |  * @interface BrowserInterface
119 |  */
120 | export interface BrowserInterface {
121 |   /**
122 |    * Launch a new browser instance
123 |    * @param {LaunchOptions} [options] - Launch configuration options
124 |    * @returns {Promise<void>} Promise resolving when browser is launched
125 |    */
126 |   launch(options?: LaunchOptions): Promise<void>;
127 | 
128 |   /**
129 |    * Close the browser instance and all its pages
130 |    * @returns {Promise<void>} Promise resolving when browser is closed
131 |    */
132 |   close(): Promise<void>;
133 | 
134 |   /**
135 |    * Create a new page in the browser
136 |    * @returns {Promise<Page>} Promise resolving to the new page instance
137 |    */
138 |   createPage(): Promise<Page>;
139 | 
140 |   /**
141 |    * Evaluate a function in a new page context
142 |    * Creates a new page, navigates to URL, executes function, and returns result
143 |    * @template T - Array of parameters to pass to the page function
144 |    * @template R - Return type of the page function
145 |    * @param {EvaluateOnNewPageOptions<T, R>} options - Evaluation options
146 |    * @returns {Promise<R | null>} Promise resolving to the function result or null
147 |    */
148 |   evaluateOnNewPage<T extends any[], R>(
149 |     options: EvaluateOnNewPageOptions<T, R>,
150 |   ): Promise<R | null>;
151 | 
152 |   /**
153 |    * Get the currently active page or create one if none exists
154 |    * @returns {Promise<Page>} Promise resolving to the active page instance
155 |    */
156 |   getActivePage(): Promise<Page>;
157 | }
158 | 
159 | export { Page };


--------------------------------------------------------------------------------
/src/search/bing.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Bing Search API
  3 |  */
  4 | import { ISearchRequestOptions, ISearchResponse } from '../interface.js';
  5 | 
  6 | 
  7 | /**
  8 |  * Options for performing a Bing search
  9 |  */
 10 | export interface BingSearchOptions {
 11 |   /**
 12 |    * Search query string
 13 |    */
 14 |   q: string;
 15 | 
 16 |   /**
 17 |    * Number of results to return
 18 |    */
 19 |   count?: number;
 20 | 
 21 |   /**
 22 |    * Result offset for pagination
 23 |    */
 24 |   offset?: number;
 25 | 
 26 |   /**
 27 |    * Market code (e.g., 'en-US')
 28 |    */
 29 |   mkt?: string;
 30 | 
 31 |   /**
 32 |    * Safe search filtering level
 33 |    */
 34 |   safeSearch?: 'Off' | 'Moderate' | 'Strict';
 35 | 
 36 |   /**
 37 |    * Bing API key
 38 |    */
 39 |   apiKey: string;
 40 | 
 41 |   /**
 42 |    * Bing Search API URL
 43 |    */
 44 |   apiUrl?: string;
 45 | 
 46 |   /**
 47 |    * Additional parameters supported by Bing Search API
 48 |    */
 49 |   [key: string]: any;
 50 | }
 51 | 
 52 | /**
 53 |  * Represents a web page result from Bing Search
 54 |  */
 55 | export interface BingSearchWebPage {
 56 |   /**
 57 |    * Title of the web page
 58 |    */
 59 |   name: string;
 60 | 
 61 |   /**
 62 |    * URL of the web page
 63 |    */
 64 |   url: string;
 65 | 
 66 |   /**
 67 |    * Text snippet from the web page
 68 |    */
 69 |   snippet: string;
 70 | 
 71 |   /**
 72 |    * Date the page was last crawled by Bing
 73 |    */
 74 |   dateLastCrawled?: string;
 75 | 
 76 |   /**
 77 |    * Display URL for the web page
 78 |    */
 79 |   displayUrl?: string;
 80 | 
 81 |   /**
 82 |    * Unique identifier for the result
 83 |    */
 84 |   id?: string;
 85 | 
 86 |   /**
 87 |    * Indicates if the content is family friendly
 88 |    */
 89 |   isFamilyFriendly?: boolean;
 90 | 
 91 |   /**
 92 |    * Indicates if the result is navigational
 93 |    */
 94 |   isNavigational?: boolean;
 95 | 
 96 |   /**
 97 |    * Language of the web page
 98 |    */
 99 |   language?: string;
100 | 
101 |   /**
102 |    * Indicates if caching should be disabled
103 |    */
104 |   noCache?: boolean;
105 | 
106 |   /**
107 |    * Name of the website
108 |    */
109 |   siteName?: string;
110 | 
111 |   /**
112 |    * URL to a thumbnail image
113 |    */
114 |   thumbnailUrl?: string;
115 | }
116 | 
117 | /**
118 |  * Represents an image result from Bing Search
119 |  */
120 | export interface BingSearchImage {
121 |   contentSize: string;
122 |   contentUrl: string;
123 |   datePublished: string;
124 |   encodingFormat: string;
125 |   height: number;
126 |   width: number;
127 |   hostPageDisplayUrl: string;
128 |   hostPageUrl: string;
129 |   name: string;
130 |   thumbnail: {
131 |     height: number;
132 |     width: number;
133 |   };
134 |   thumbnailUrl: string;
135 |   webSearchUrl: string;
136 | }
137 | 
138 | /**
139 |  * Represents a video result from Bing Search
140 |  */
141 | export interface BingSearchVideo {
142 |   allowHttpsEmbed: boolean;
143 |   allowMobileEmbed: boolean;
144 |   contentUrl: string;
145 |   creator?: {
146 |     name: string;
147 |   };
148 |   datePublished: string;
149 |   description: string;
150 |   duration: string;
151 |   embedHtml: string;
152 |   encodingFormat: string;
153 |   height: number;
154 |   width: number;
155 |   hostPageDisplayUrl: string;
156 |   hostPageUrl: string;
157 |   name: string;
158 |   publisher?: {
159 |     name: string;
160 |   }[];
161 |   thumbnail: {
162 |     height: number;
163 |     width: number;
164 |   };
165 |   thumbnailUrl: string;
166 |   viewCount?: number;
167 |   webSearchUrl: string;
168 | }
169 | 
170 | export interface BingSearchResponse {
171 |   _type?: string;
172 |   queryContext?: {
173 |     originalQuery: string;
174 |   };
175 |   webPages?: {
176 |     value: BingSearchWebPage[];
177 |     totalEstimatedMatches?: number;
178 |     someResultsRemoved?: boolean;
179 |     webSearchUrl?: string;
180 |   };
181 |   images?: {
182 |     value: BingSearchImage[];
183 |     isFamilyFriendly?: boolean;
184 |     readLink?: string;
185 |     webSearchUrl?: string;
186 |     id?: string;
187 |   };
188 |   videos?: {
189 |     value: BingSearchVideo[];
190 |     isFamilyFriendly?: boolean;
191 |     readLink?: string;
192 |     webSearchUrl?: string;
193 |     id?: string;
194 |     scenario?: string;
195 |   };
196 |   rankingResponse?: {
197 |     mainline?: {
198 |       items: {
199 |         answerType: string;
200 |         resultIndex?: number;
201 |         value: {
202 |           id: string;
203 |         };
204 |       }[];
205 |     };
206 |   };
207 |   [key: string]: any; // Allow other response fields
208 | }
209 | 
210 | export async function bingSearch(options: ISearchRequestOptions): Promise<ISearchResponse> {
211 |   const { query, limit = 10, safeSearch = 0, page = 1, apiUrl = 'https://api.bing.microsoft.com/v7.0/search', apiKey, language } = options;
212 | 
213 |   const bingSafeSearchOptions = ['Off', 'Moderate', 'Strict'];
214 | 
215 |   if (!apiKey) {
216 |     throw new Error('Bing API key is required');
217 |   }
218 | 
219 |   const searchOptions = {
220 |     q: query,
221 |     count: limit,
222 |     offset: (page - 1) * limit,
223 |     mkt: language,
224 |     safeSearch: bingSafeSearchOptions[safeSearch] as 'Off' | 'Moderate' | 'Strict',
225 |   };
226 | 
227 |   try {
228 |     const queryParams = new URLSearchParams();
229 |     Object.entries(searchOptions).forEach(([key, value]) => {
230 |       if (value !== undefined) {
231 |         queryParams.set(key, value.toString());
232 |       }
233 |     });
234 | 
235 |     const res = await fetch(`${apiUrl}?${queryParams}`, {
236 |       method: 'GET',
237 |       headers: {
238 |         'Content-Type': 'application/json',
239 |         'Ocp-Apim-Subscription-Key': apiKey,
240 |       },
241 |     });
242 | 
243 |     if (!res.ok) {
244 |       throw new Error(`Bing search error: ${res.status} ${res.statusText}`);
245 |     }
246 | 
247 |     const data = await res.json();
248 |     const serp = data.webPages?.value as Array<BingSearchWebPage>;
249 |     const results = serp?.map((item: BingSearchWebPage) => ({
250 |       title: item.name,
251 |       snippet: item.snippet,
252 |       url: item.url,
253 |       source: item.siteName,
254 |       thumbnailUrl: item.thumbnailUrl,
255 |       language: item.language,
256 |       image: null,
257 |       video: null,
258 |       engine: 'bing',
259 |     })) ?? [];
260 | 
261 |     return {
262 |       results,
263 |       success: true,
264 |     };
265 |   } catch (err: unknown) {
266 |     const msg = err instanceof Error ? err.message : 'Bing search error.';
267 |     process.stdout.write(msg);
268 |     throw err;
269 |   }
270 | }


--------------------------------------------------------------------------------
/src/search/duckduckgo.ts:
--------------------------------------------------------------------------------
 1 | import * as DDG from 'duck-duck-scrape';
 2 | import asyncRetry from 'async-retry';
 3 | import type { SearchOptions } from 'duck-duck-scrape';
 4 | import { ISearchRequestOptions, ISearchResponse } from '../interface.js';
 5 | 
 6 | 
 7 | export async function duckDuckGoSearch(options: Omit<ISearchRequestOptions, 'safeSearch'> & SearchOptions): Promise<ISearchResponse> {
 8 |   try {
 9 |     const { query, timeout = 10000, safeSearch = DDG.SafeSearchType.OFF, retry = { retries: 3 }, ...searchOptions } = options;
10 |   
11 |     const res = await asyncRetry(
12 |       () => {
13 |         return DDG.search(query, {
14 |           ...searchOptions,
15 |           safeSearch,
16 |         }, {
17 |           // needle options
18 |           response_timeout: timeout,
19 |         });
20 |       },
21 |       retry,
22 |     );
23 | 
24 |     const results = res ? {
25 |       noResults: res.noResults,
26 |       vqd: res.vqd,
27 |       results: res.results,
28 |     } : {
29 |       noResults: true,
30 |       vqd: '',
31 |       results: [],
32 |     };
33 | 
34 |     return {
35 |       results: results.results.map((result) => ({
36 |         title: result.title,
37 |         snippet: result.description,
38 |         url: result.url,
39 |         source: result.hostname,
40 |         image: null,
41 |         video: null,
42 |         engine: 'duckduckgo',
43 |       })),
44 |       success: true,
45 |     };
46 |   } catch (error) {
47 |     const msg = error instanceof Error ? error.message : 'DuckDuckGo search error.';
48 |     process.stdout.write(msg);
49 |     throw error;
50 |   }
51 | }
52 | 


--------------------------------------------------------------------------------
/src/search/index.ts:
--------------------------------------------------------------------------------
1 | export * from './bing.js';
2 | export * from './duckduckgo.js';
3 | export * from './searxng.js';
4 | export * from './tavily.js';
5 | export * from './local.js';


--------------------------------------------------------------------------------
/src/search/local.ts:
--------------------------------------------------------------------------------
 1 | import { ISearchRequestOptions, ISearchResponse, ISearchResponseResult } from '../interface.js';
 2 | import { BrowserSearch, LocalBrowserSearchEngine } from '../libs/browser-search/index.js';
 3 | import { ConsoleLogger } from '@agent-infra/logger';
 4 | 
 5 | const logger = new ConsoleLogger('[LocalSearch]');
 6 | 
 7 | export async function localSearch(options: ISearchRequestOptions): Promise<ISearchResponse> {
 8 |   const { query, limit = 10 } = options;
 9 |   let { engines = 'all' } = options;
10 |   const browserSearch = new BrowserSearch({
11 |     logger,
12 |     browserOptions: {
13 |       headless: true,
14 |     },
15 |   });
16 | 
17 |   if (engines === 'all') {
18 |     engines = 'bing,google,baidu,sogou';
19 |   }
20 | 
21 |   try {
22 |     const engineList = engines.split(',');
23 | 
24 |     if (engineList.length === 0) {
25 |       throw new Error('engines is required');
26 |     }
27 | 
28 |     const results: ISearchResponseResult[] = [];
29 | 
30 |     for (const engine of engineList) {
31 |       const res = await browserSearch.perform({
32 |         query,
33 |         count: limit,
34 |         engine: engine as LocalBrowserSearchEngine,
35 |         needVisitedUrls: false,
36 |       });
37 | 
38 |       if (res.length > 0) {
39 |         results.push(...res);
40 |         break;
41 |       }
42 |     }
43 | 
44 |     logger.info(`Found ${results.length} results for ${query}`, results);
45 | 
46 |     return {
47 |       results,
48 |       success: true,
49 |     };
50 |   } catch (err: unknown) {
51 |     const msg = err instanceof Error ? err.message : 'Local search error.';
52 |     process.stdout.write(msg);
53 |     throw err;
54 |   } finally {
55 |     await browserSearch.closeBrowser();
56 |   }
57 | }


--------------------------------------------------------------------------------
/src/search/searxng.ts:
--------------------------------------------------------------------------------
 1 | import url from 'node:url';
 2 | import { ISearchRequestOptions, ISearchResponse, ISearchResponseResult } from '../interface.js';
 3 | 
 4 | /**
 5 |  * SearxNG Search API
 6 |  * - https://docs.searxng.org/dev/search_api.html
 7 |  */
 8 | export async function searxngSearch(params: ISearchRequestOptions): Promise<ISearchResponse> {
 9 |   try {
10 |     const {
11 |       query,
12 |       page = 1,
13 |       limit = 10,
14 |       categories = 'general',
15 |       engines = 'all',
16 |       safeSearch = 0,
17 |       format = 'json',
18 |       language = 'auto',
19 |       timeRange = '',
20 |       timeout = 10000,
21 |       apiKey,
22 |       apiUrl,
23 |     } = params;
24 | 
25 |     if (!apiUrl) {
26 |       throw new Error('SearxNG API URL is required');
27 |     }
28 | 
29 |     const controller = new AbortController();
30 |     const timeoutId = setTimeout(() => controller.abort(), Number(timeout));
31 | 
32 |     const config = {
33 |       q: query,
34 |       pageno: page,
35 |       categories,
36 |       format,
37 |       safesearch: safeSearch,
38 |       language,
39 |       engines,
40 |       time_range: timeRange,
41 |     };
42 | 
43 |     const endpoint = `${apiUrl}/search`;
44 | 
45 |     const queryParams = url.format({ query: config });
46 | 
47 |     const headers: HeadersInit = {
48 |       'Content-Type': 'application/json',
49 |     };
50 | 
51 |     if (apiKey) {
52 |       headers['Authorization'] = `Bearer ${apiKey}`;
53 |     }
54 | 
55 |     const res = await fetch(`${endpoint}${queryParams}`, {
56 |       method: 'POST',
57 |       headers,
58 |       signal: controller.signal,
59 |     });
60 | 
61 |     clearTimeout(timeoutId);
62 |     const response = await res.json();
63 |     if (response.results) {
64 |       const list = (response.results as Array<Record<string, any>>).slice(0, limit);
65 |       const results: ISearchResponseResult[] = list.map((item: Record<string, any>) => {
66 |         const image = item.img_src ? {
67 |           thumbnail: item.thumbnail_src,
68 |           src: item.img_src,
69 |         } : null;
70 |         const video = item.iframe_src ? {
71 |           thumbnail: item.thumbnail_src,
72 |           src: item.iframe_src,
73 |         } : null;
74 |         return {
75 |           title: item.title,
76 |           snippet: item.content,
77 |           url: item.url,
78 |           source: item.source,
79 |           image,
80 |           video,
81 |           engine: item.engine,
82 |         };
83 |       });
84 |       return {
85 |         results,
86 |         success: true,
87 |       };
88 |     }
89 |     return {
90 |       results: [],
91 |       success: false,
92 |     };
93 |   } catch (err: unknown) {
94 |     const msg = err instanceof Error ? err.message : 'Searxng search error.';
95 |     process.stdout.write(msg);
96 |     throw err;
97 |   }
98 | }
99 | 


--------------------------------------------------------------------------------
/src/search/tavily.ts:
--------------------------------------------------------------------------------
 1 | import { tavily, TavilySearchOptions } from '@tavily/core';
 2 | import { ISearchRequestOptions, ISearchResponse } from '../interface.js';
 3 | 
 4 | /**
 5 |  * Tavily Search API
 6 |  * - https://docs.tavily.com/documentation/quickstart
 7 |  */
 8 | export async function tavilySearch(options: ISearchRequestOptions): Promise<ISearchResponse> {
 9 |   const {
10 |     query,
11 |     limit = 10,
12 |     categories = 'general',
13 |     timeRange,
14 |     apiKey,
15 |   } = options;
16 | 
17 |   if (!apiKey) {
18 |     throw new Error('Tavily API key is required');
19 |   }
20 | 
21 |   try {
22 |     const tvly = tavily({
23 |       apiKey,
24 |     });
25 |   
26 |     const params: TavilySearchOptions = {
27 |       topic: categories as TavilySearchOptions['topic'],
28 |       timeRange: timeRange as TavilySearchOptions['timeRange'],
29 |       maxResults: limit,
30 |     };
31 |   
32 |     const res = await tvly.search(query, params);
33 |     const results = res.results.map(item => ({
34 |       title: item.title,
35 |       url: item.url,
36 |       snippet: item.content,
37 |       engine: 'tavily',
38 |     }));
39 |   
40 |     return {
41 |       results,
42 |       success: true,
43 |     };
44 |   } catch (error) {
45 |     const msg = error instanceof Error ? error.message : 'Tavily search error.';
46 |     process.stdout.write(msg);
47 |     throw error;
48 |   }
49 | }


--------------------------------------------------------------------------------
/src/tools.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * The following tools are based on the Firecrawl MCP Server
  3 |  * https://github.com/mendableai/firecrawl-mcp-server
  4 |  */
  5 | 
  6 | import { Tool } from '@modelcontextprotocol/sdk/types.js';
  7 | 
  8 | // tools definition
  9 | export const SEARCH_TOOL: Tool = {
 10 |   name: 'one_search',
 11 |   description:
 12 |     'Search and retrieve content from web pages. ' +
 13 |     'Returns SERP results by default (url, title, description).',
 14 |   inputSchema: {
 15 |     type: 'object',
 16 |     properties: {
 17 |       query: {
 18 |         type: 'string',
 19 |         description: 'Search query string',
 20 |       },
 21 |       limit: {
 22 |         type: 'number',
 23 |         description: 'Maximum number of results to return (default: 10)',
 24 |       },
 25 |       language: {
 26 |         type: 'string',
 27 |         description: 'Language code for search results (default: auto)',
 28 |       },
 29 |       categories: {
 30 |         type: 'string',
 31 |         enum: [
 32 |           'general',
 33 |           'news',
 34 |           'images',
 35 |           'videos',
 36 |           'it',
 37 |           'science',
 38 |           'map',
 39 |           'music',
 40 |           'files',
 41 |           'social_media',
 42 |         ],
 43 |         description: 'Categories to search for (default: general)',
 44 |       },
 45 |       timeRange: {
 46 |         type: 'string',
 47 |         description: 'Time range for search results (default: all)',
 48 |         enum: [
 49 |           'all',
 50 |           'day',
 51 |           'week',
 52 |           'month',
 53 |           'year',
 54 |         ],
 55 |       },
 56 |     },
 57 |     required: ['query'],
 58 |   },
 59 | };
 60 | 
 61 | export const MAP_TOOL: Tool = {
 62 |   name: 'one_map',
 63 |   description:
 64 |     'Discover URLs from a starting point. Can use both sitemap.xml and HTML link discovery.',
 65 |   inputSchema: {
 66 |     type: 'object',
 67 |     properties: {
 68 |       url: {
 69 |         type: 'string',
 70 |         description: 'Starting URL for URL discovery',
 71 |       },
 72 |       search: {
 73 |         type: 'string',
 74 |         description: 'Optional search term to filter URLs',
 75 |       },
 76 |       ignoreSitemap: {
 77 |         type: 'boolean',
 78 |         description: 'Skip sitemap.xml discovery and only use HTML links',
 79 |       },
 80 |       sitemapOnly: {
 81 |         type: 'boolean',
 82 |         description: 'Only use sitemap.xml for discovery, ignore HTML links',
 83 |       },
 84 |       includeSubdomains: {
 85 |         type: 'boolean',
 86 |         description: 'Include URLs from subdomains in results',
 87 |       },
 88 |       limit: {
 89 |         type: 'number',
 90 |         description: 'Maximum number of URLs to return',
 91 |       },
 92 |     },
 93 |     required: ['url'],
 94 |   },
 95 | };
 96 | 
 97 | export const SCRAPE_TOOL: Tool = {
 98 |   name: 'one_scrape',
 99 |   description:
100 |     'Scrape a single webpage with advanced options for content extraction. ' +
101 |     'Supports various formats including markdown, HTML, and screenshots. ' +
102 |     'Can execute custom actions like clicking or scrolling before scraping.',
103 |   inputSchema: {
104 |     type: 'object',
105 |     properties: {
106 |       url: {
107 |         type: 'string',
108 |         description: 'The URL to scrape',
109 |       },
110 |       formats: {
111 |         type: 'array',
112 |         items: {
113 |           type: 'string',
114 |           enum: [
115 |             'markdown',
116 |             'html',
117 |             'rawHtml',
118 |             'screenshot',
119 |             'links',
120 |             'screenshot@fullPage',
121 |             'extract',
122 |           ],
123 |         },
124 |         description: "Content formats to extract (default: ['markdown'])",
125 |       },
126 |       onlyMainContent: {
127 |         type: 'boolean',
128 |         description:
129 |           'Extract only the main content, filtering out navigation, footers, etc.',
130 |       },
131 |       includeTags: {
132 |         type: 'array',
133 |         items: { type: 'string' },
134 |         description: 'HTML tags to specifically include in extraction',
135 |       },
136 |       excludeTags: {
137 |         type: 'array',
138 |         items: { type: 'string' },
139 |         description: 'HTML tags to exclude from extraction',
140 |       },
141 |       waitFor: {
142 |         type: 'number',
143 |         description: 'Time in milliseconds to wait for dynamic content to load',
144 |       },
145 |       timeout: {
146 |         type: 'number',
147 |         description:
148 |           'Maximum time in milliseconds to wait for the page to load',
149 |       },
150 |       actions: {
151 |         type: 'array',
152 |         items: {
153 |           type: 'object',
154 |           properties: {
155 |             type: {
156 |               type: 'string',
157 |               enum: [
158 |                 'wait',
159 |                 'click',
160 |                 'screenshot',
161 |                 'write',
162 |                 'press',
163 |                 'scroll',
164 |                 'scrape',
165 |                 'executeJavascript',
166 |               ],
167 |               description: 'Type of action to perform',
168 |             },
169 |             selector: {
170 |               type: 'string',
171 |               description: 'CSS selector for the target element',
172 |             },
173 |             milliseconds: {
174 |               type: 'number',
175 |               description: 'Time to wait in milliseconds (for wait action)',
176 |             },
177 |             text: {
178 |               type: 'string',
179 |               description: 'Text to write (for write action)',
180 |             },
181 |             key: {
182 |               type: 'string',
183 |               description: 'Key to press (for press action)',
184 |             },
185 |             direction: {
186 |               type: 'string',
187 |               enum: ['up', 'down'],
188 |               description: 'Scroll direction',
189 |             },
190 |             script: {
191 |               type: 'string',
192 |               description: 'JavaScript code to execute',
193 |             },
194 |             fullPage: {
195 |               type: 'boolean',
196 |               description: 'Take full page screenshot',
197 |             },
198 |           },
199 |           required: ['type'],
200 |         },
201 |         description: 'List of actions to perform before scraping',
202 |       },
203 |       extract: {
204 |         type: 'object',
205 |         properties: {
206 |           schema: {
207 |             type: 'object',
208 |             description: 'Schema for structured data extraction',
209 |           },
210 |           systemPrompt: {
211 |             type: 'string',
212 |             description: 'System prompt for LLM extraction',
213 |           },
214 |           prompt: {
215 |             type: 'string',
216 |             description: 'User prompt for LLM extraction',
217 |           },
218 |         },
219 |         description: 'Configuration for structured data extraction',
220 |       },
221 |       mobile: {
222 |         type: 'boolean',
223 |         description: 'Use mobile viewport',
224 |       },
225 |       skipTlsVerification: {
226 |         type: 'boolean',
227 |         description: 'Skip TLS certificate verification',
228 |       },
229 |       removeBase64Images: {
230 |         type: 'boolean',
231 |         description: 'Remove base64 encoded images from output',
232 |       },
233 |       location: {
234 |         type: 'object',
235 |         properties: {
236 |           country: {
237 |             type: 'string',
238 |             description: 'Country code for geolocation',
239 |           },
240 |           languages: {
241 |             type: 'array',
242 |             items: { type: 'string' },
243 |             description: 'Language codes for content',
244 |           },
245 |         },
246 |         description: 'Location settings for scraping',
247 |       },
248 |     },
249 |     required: ['url'],
250 |   },
251 | };
252 | 
253 | 
254 | 
255 | export const EXTRACT_TOOL: Tool = {
256 |   name: 'one_extract',
257 |   description:
258 |     'Extract structured information from web pages using LLM. ' +
259 |     'Supports both cloud AI and self-hosted LLM extraction.',
260 |   inputSchema: {
261 |     type: 'object',
262 |     properties: {
263 |       urls: {
264 |         type: 'array',
265 |         items: { type: 'string' },
266 |         description: 'List of URLs to extract information from',
267 |       },
268 |       prompt: {
269 |         type: 'string',
270 |         description: 'Prompt for the LLM extraction',
271 |       },
272 |       systemPrompt: {
273 |         type: 'string',
274 |         description: 'System prompt for LLM extraction',
275 |       },
276 |       schema: {
277 |         type: 'object',
278 |         description: 'JSON schema for structured data extraction',
279 |       },
280 |       allowExternalLinks: {
281 |         type: 'boolean',
282 |         description: 'Allow extraction from external links',
283 |       },
284 |       enableWebSearch: {
285 |         type: 'boolean',
286 |         description: 'Enable web search for additional context',
287 |       },
288 |       includeSubdomains: {
289 |         type: 'boolean',
290 |         description: 'Include subdomains in extraction',
291 |       },
292 |     },
293 |     required: ['urls'],
294 |   },
295 | };
296 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     /* 基本选项 */
 4 |     "target": "es2022",
 5 |     "lib": ["dom", "es6", "dom.iterable", "scripthost"],
 6 |     "module": "NodeNext",
 7 |     "moduleResolution": "NodeNext",
 8 |     "rootDir": "./src",
 9 |     "resolveJsonModule": true,
10 |     
11 |     /* JavaScript支持 */
12 |     "allowJs": true,
13 |     
14 |     /* 输出选项 */
15 |     "sourceMap": true,
16 |     "outDir": "./dist",
17 |     
18 |     /* 互操作约束 */
19 |     "esModuleInterop": true,
20 |     "forceConsistentCasingInFileNames": true,
21 |     
22 |     /* 类型检查 */
23 |     "strict": true,
24 |     "noImplicitAny": true,
25 |     "noUnusedLocals": true,
26 |     "noUnusedParameters": true,
27 |     "noImplicitReturns": true,
28 |     "skipLibCheck": true,
29 |     "strictPropertyInitialization": false,
30 |     "strictNullChecks": true,
31 |     "stripInternal": true
32 |   },
33 |   "include": [
34 |     "src/**/*"
35 |   ],
36 |   "exclude": [
37 |     "node_modules",
38 |     "dist",
39 |     "deploy",
40 |     "test",
41 |     "build"
42 |   ]
43 | }
44 | 


--------------------------------------------------------------------------------