├── .dockerignore
├── .env.docker_example
├── .env.example
├── .gitignore
├── Dockerfile
├── LICENSE
├── README_zh.md
├── docker-compose-dev.yaml
├── docker-compose.yaml
├── main.py
├── readme.md
├── requirements.txt
├── run-services.sh
└── searxng
    ├── limiter.toml
    ├── settings.yml
    ├── settings.yml.new
    └── uwsgi.ini


/.dockerignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.pyo
3 | *.pyd
4 | __pycache__
5 | .env
6 | venv/


--------------------------------------------------------------------------------
/.env.docker_example:
--------------------------------------------------------------------------------
 1 | SEARXNG_URL=http://searxng:8080
 2 | BROWSERLESS_URL=http://browserless:3000
 3 | TOKEN=your_browserless_token_here  # Replace with your actual token
 4 | # PROXY_PROTOCOL=http
 5 | # PROXY_URL=your_proxy_url
 6 | # PROXY_USERNAME=your_proxy_username
 7 | # PROXY_PASSWORD=your_proxy_password
 8 | # PROXY_PORT=your_proxy_port
 9 | REQUEST_TIMEOUT=30
10 | 
11 | 
12 | # AI Integration for search result filter
13 | FILTER_SEARCH_RESULT_BY_AI=true
14 | AI_ENGINE=groq
15 | # GROQ
16 | GROQ_API_KEY=yours_groq_api_key_here
17 | GROQ_MODEL=llama3-8b-8192
18 | # OPENAI
19 | # OPENAI_API_KEY=your_openai_api_key_here
20 | # OPENAI_MODEL=gpt-3.5-turbo-0125


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
 1 | SEARXNG_URL=
 2 | BROWSERLESS_URL=
 3 | TOKEN=
 4 | # PROXY_PROTOCOL=http
 5 | # PROXY_URL=us.premium-residential.geonode.com
 6 | # PROXY_USERNAME=geonode_OglxV49yXfyXf
 7 | # PROXY_PASSWORD=3e952514-ef6a-42a6-98fa-cc9e86cc0628
 8 | # PROXY_PORT=9000
 9 | REQUEST_TIMEOUT=300
10 | 
11 | 
12 | # AI Integration for search result filter
13 | FILTER_SEARCH_RESULT_BY_AI=true
14 | AI_ENGINE=groq
15 | # GROQ
16 | GROQ_API_KEY=yours_groq_api_key_here
17 | GROQ_MODEL=llama3-8b-8192
18 | # OPENAI
19 | # OPENAI_API_KEY=your_openai_api_key_here
20 | # OPENAI_MODEL=gpt-3.5-turbo-0125


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | venv/
2 | *.pyc
3 | *.pyo
4 | *.pyd
5 | __pycache__
6 | .env
7 | venvs/


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use an official Python runtime as a parent image
 2 | FROM python:3.10-slim
 3 | 
 4 | # Set the working directory in the container
 5 | WORKDIR /app
 6 | 
 7 | # Copy the current directory contents into the container at /app
 8 | COPY . /app
 9 | # if .env is not availble, it copies the .env.example file to .env
10 | RUN test -f .env || cp .env.docker_example .env
11 | 
12 | 
13 | # Install any needed packages specified in requirements.txt
14 | RUN pip install --no-cache-dir -r requirements.txt
15 | RUN pip install watchgod
16 | 
17 | # Make port 8000 available to the world outside this container
18 | EXPOSE 8000
19 | 
20 | # Run app.py when the container launches
21 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Essa Mamdani
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README_zh.md:
--------------------------------------------------------------------------------
  1 | # 使用 FastAPI、SearXNG、Browserless 和 AI 集成生成 Markdown 输出的 Jina.ai 替代搜索结果抓取器
  2 | 
  3 | 中文 | [English](README.md)
  4 | 
  5 | [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
  6 | 
  7 | ## 描述
  8 | 
  9 | 本项目提供了一个强大的网页抓取工具，它使用 FastAPI、SearXNG 和 Browserless 抓取搜索结果并将其转换为 Markdown 格式。该工具包括使用代理进行网页抓取的功能，并能高效地将 HTML 内容转换为 Markdown。现在还支持使用 AI 进行搜索结果过滤。替代方案包括 Jina.ai、FireCrawl AI、Exa AI 和 2markdown，为开发人员提供各种网页抓取和搜索引擎解决方案。
 10 | 
 11 | ## 目录
 12 | - [使用 FastAPI、SearXNG、Browserless 和 AI 集成生成 Markdown 输出的 Jina.ai 替代搜索结果抓取器](#使用-fastapisearxngbrowserless-和-ai-集成生成-markdown-输出的-jinaai-替代搜索结果抓取器)
 13 |   - [描述](#描述)
 14 |   - [目录](#目录)
 15 |   - [替代方案：](#替代方案)
 16 |   - [功能](#功能)
 17 |   - [先决条件](#先决条件)
 18 |   - [Docker 设置](#docker-设置)
 19 |   - [手动设置](#手动设置)
 20 |   - [使用方法](#使用方法)
 21 |     - [搜索端点](#搜索端点)
 22 |     - [获取 URL 内容](#获取-url-内容)
 23 |     - [获取图片](#获取图片)
 24 |     - [获取视频](#获取视频)
 25 |   - [使用代理](#使用代理)
 26 |   - [路线图](#路线图)
 27 |   - [代码说明](#代码说明)
 28 |   - [许可证](#许可证)
 29 |   - [作者](#作者)
 30 |   - [贡献](#贡献)
 31 |   - [致谢](#致谢)
 32 |   - [Star 历史](#star-历史)
 33 | 
 34 | ## 替代方案：
 35 | 
 36 | - [Jina.ai](https://jina.ai/): 为开发人员提供的强大搜索引擎。
 37 | - [FireCrawl AI](https://firecrawl.dev/): 为开发人员提供的网页抓取 API。
 38 | - [Exa AI](https://exa.ai/): 为开发人员提供的网页抓取 API。
 39 | - [2markdown](https://2markdown.com/): 将 HTML 转换为 Markdown 的网页抓取工具。
 40 | 
 41 | ## 功能
 42 | 
 43 | - **FastAPI**: 用于构建 Python API 的现代、快速 Web 框架。
 44 | - **SearXNG**: 开源的互联网元搜索引擎。
 45 | - **Browserless**: 一个 Web 浏览器自动化服务。
 46 | - **Markdown 输出**: 将 HTML 内容转换为 Markdown 格式。
 47 | - **代理支持**: 使用代理进行安全和匿名抓取。
 48 | - **AI 集成（Reranker AI）**: 使用 AI 过滤搜索结果以提供最相关的内容。
 49 | - **YouTube 转录**: 获取 YouTube 视频转录。
 50 | - **图片和视频搜索**: 使用 SearXNG 获取图片和视频结果。
 51 | 
 52 | ## 先决条件
 53 | 
 54 | 确保已安装以下内容：
 55 | 
 56 | - Python 3.11
 57 | - Virtualenv
 58 | - Docker
 59 | 
 60 | ## Docker 设置
 61 | 
 62 | 您可以使用 Docker 简化设置过程。请按照以下步骤操作：
 63 | 
 64 | 1. **克隆仓库**:
 65 |     ```sh
 66 |     git clone https://github.com/essamamdani/search-result-scraper-markdown.git
 67 |     cd search-result-scraper-markdown
 68 |     ```
 69 | 
 70 | 2. **运行 Docker Compose**:
 71 |     ```sh
 72 |     docker compose up --build
 73 |     ```
 74 | 
 75 | 通过此设置，如果更改 `.env` 或 `main.py` 文件，则不再需要重启 Docker。更改将自动重新加载。
 76 | 
 77 | ## 手动设置
 78 | 
 79 | 按照以下步骤进行手动设置：
 80 | 
 81 | 1. **克隆仓库**:
 82 |     ```sh
 83 |     git clone https://github.com/essamamdani/search-result-scraper-markdown.git
 84 |     cd search-result-scraper-markdown
 85 |     ```
 86 | 
 87 | 2. **创建并激活虚拟环境**:
 88 |     ```sh
 89 |     virtualenv venv
 90 |     source venv/bin/activate
 91 |     ```
 92 | 
 93 | 3. **安装依赖项**:
 94 |     ```sh
 95 |     pip install -r requirements.txt
 96 |     ```
 97 | 
 98 | 4. **在根目录中创建一个 .env 文件**，内容如下：
 99 |     ```bash
100 |     SEARXNG_URL=http://searxng:8080
101 |     BROWSERLESS_URL=http://browserless:3000
102 |     TOKEN=your_browserless_token_here  # 用您的实际令牌替换
103 |     # PROXY_PROTOCOL=http
104 |     # PROXY_URL=your_proxy_url
105 |     # PROXY_USERNAME=your_proxy_username
106 |     # PROXY_PASSWORD=your_proxy_password
107 |     # PROXY_PORT=your_proxy_port
108 |     REQUEST_TIMEOUT=30
109 | 
110 |     # 用于搜索结果过滤的 AI 集成
111 |     FILTER_SEARCH_RESULT_BY_AI=true
112 |     AI_ENGINE=groq
113 |     # GROQ
114 |     GROQ_API_KEY=yours_groq_api_key_here
115 |     GROQ_MODEL=llama3-8b-8192
116 |     # OPENAI
117 |     # OPENAI_API_KEY=your_openai_api_key_here
118 |     # OPENAI_MODEL=gpt-3.5-turbo-0125
119 |     ```
120 | 
121 | 5. **运行 SearXNG 和 Browserless 的 Docker 容器**:
122 |     ```sh
123 |     ./run-services.sh
124 |     ```
125 | 
126 | 6. **启动 FastAPI 应用程序**:
127 |     ```sh
128 |     uvicorn main:app --host 0.0.0.0 --port 8000
129 |     ```
130 | 
131 | ## 使用方法
132 | 
133 | ### 搜索端点
134 | 
135 | 要执行搜索查询，请向根端点 `/` 发送带有查询参数 `q`（搜索查询）、`num_results`（结果数量）和 `format`（以 JSON 或默认 Markdown 格式获取响应）的 GET 请求。
136 | 
137 | 示例:
138 | ```sh
139 | curl "http://localhost:8000/?q=python&num_results=5&format=json" # 获取 JSON 格式
140 | curl "http://localhost:8000/?q=python&num_results=5" # 默认 Markdown 格式
141 | ```
142 | 
143 | ### 获取 URL 内容
144 | 
145 | 要获取并转换特定 URL 的内容为 Markdown，请向 `/r/{url:path}` 端点发送 GET 请求。
146 | 
147 | 示例:
148 | ```sh
149 | curl "http://localhost:8000/r/https://example.com&format=json" # 获取 JSON 格式
150 | curl "http://localhost:8000/r/https://example.com" # 默认 Markdown 格式
151 | ```
152 | 
153 | ### 获取图片
154 | 
155 | 要获取图片搜索结果，请向 `/images` 端点发送带有查询参数 `q`（搜索查询）和 `num_results`（结果数量）的 GET 请求。
156 | 
157 | 示例:
158 | ```sh
159 | curl "http://localhost:8000/images?q=puppies&num_results=5"
160 | ```
161 | 
162 | ### 获取视频
163 | 
164 | 要获取视频搜索结果，请向 `/videos` 端点发送带有查询参数 `q`（搜索查询）和 `num_results`（结果数量）的 GET 请求。
165 | 
166 | 示例:
167 | ```sh
168 | curl "http://localhost:8000/videos?q=cooking+recipes&num_results=5"
169 | ```
170 | 
171 | ## 使用代理
172 | 
173 | 本项目使用 Geonode 代理进行网页抓取。您可以使用 [我的 Geonode 推荐链接](https://geonode.com/invite/47389) 开始使用他们的代理服务。
174 | 
175 | ## 路线图
176 | 
177 | - [x] **FastAPI**: 用于构建 Python API 的现代、快速 Web 框架。
178 | - [x] **SearXNG**: 开源的互联网元搜索引擎。
179 | - [x] **Browserless**: 一个 Web 浏览器自动化服务。
180 | - [x] **Markdown 输出**: 将 HTML 内容转换为 Markdown 格式。
181 | - [x] **代理支持**: 使用代理进行安全和匿名抓取。
182 | - [x] **AI 集成（Reranker AI）**: 使用 AI 过滤搜索结果以提供最相关的内容。
183 | - [x] **YouTube 转录**: 获取 YouTube 视频转录。
184 | - [x] **图片和视频搜索**: 使用 SearXNG 获取图片和视频结果。
185 | 
186 | ## 代码说明
187 | 
188 | 有关代码的详细说明，请访问 [这里](https://www.essamamdani.com/articles/search-result-scraper-markdown) 的文章。
189 | 
190 | ## 许可证
191 | 
192 | 本项目根据 MIT 许可证授权。有关详细信息，请参阅 [LICENSE](LICENSE) 文件。
193 | 
194 | ## 作者
195 | 
196 | Essa Mamdani - [essamamdani.com](https://essamamdani.com)
197 | 
198 | ## 贡献
199 | 
200 | 欢迎贡献！请随时提交 Pull Request。
201 | 
202 | ## 致谢
203 | 
204 | - [FastAPI](https://fastapi.tiangolo.com/)
205 | - [SearXNG](https://github.com/searxng/searxng)
206 | - [Browserless](https://www.browserless.io/)
207 | 
208 | ## Star 历史
209 | 
210 | [![Star History Chart](https://api.star-history.com/svg?repos=essamamdani/search-result-scraper-markdown&type=Date)](https://star-history.com/#essamamdani/search-result-scraper-markdown&Date)


--------------------------------------------------------------------------------
/docker-compose-dev.yaml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   search-webscraper-markdown:
 3 |     build:
 4 |       context: .
 5 |       dockerfile: Dockerfile  # This refers to your Python FastAPI Dockerfile
 6 |     container_name: search-webscraper-markdown
 7 |     ports:
 8 |       - "8000:8000"
 9 |     depends_on:
10 |       - searxng
11 |       - browserless
12 |     volumes:
13 |       - .:/app 
14 |     
15 |   searxng:
16 |     container_name: searxng
17 |     image: docker.io/searxng/searxng:latest
18 |     restart: unless-stopped
19 |     networks:
20 |       - default
21 |     ports:
22 |       - "8080:8080"
23 |     volumes:
24 |       - ./searxng:/etc/searxng:rw
25 | 
26 |   browserless:
27 |     container_name: browserless
28 |     image: ghcr.io/browserless/chromium
29 |     restart: unless-stopped
30 |     networks:
31 |       - default
32 |     ports:
33 |       - "3000:3000"
34 |     environment:
35 |       - TOKEN=your_browserless_token_here  # Replace with your actual token
36 |       - MAX_CONCURRENT_SESSIONS=10
37 |       - TIMEOUT=60000
38 |       - QUEUED=10
39 | 
40 | networks:
41 |   default:
42 |     driver: bridge
43 | 


--------------------------------------------------------------------------------
/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   search-webscraper-markdown:
 3 |     build:
 4 |       context: .
 5 |       dockerfile: Dockerfile  # This refers to your Python FastAPI Dockerfile
 6 |     container_name: search-webscraper-markdown
 7 |     ports:
 8 |       - "8000:8000"
 9 |     depends_on:
10 |       - searxng
11 |       - browserless
12 |     volumes:
13 |       - .:/app 
14 |     
15 |   searxng:
16 |     container_name: searxng
17 |     image: docker.io/searxng/searxng:latest
18 |     restart: unless-stopped
19 |     networks:
20 |       - default
21 |     ports:
22 |       - "8080:8080"
23 |     volumes:
24 |       - ./searxng:/etc/searxng:rw
25 | 
26 |   browserless:
27 |     container_name: browserless
28 |     image: ghcr.io/browserless/chromium
29 |     restart: unless-stopped
30 |     networks:
31 |       - default
32 |     ports:
33 |       - "3000:3000"
34 |     environment:
35 |       - TOKEN=your_browserless_token_here  # Replace with your actual token
36 |       - MAX_CONCURRENT_SESSIONS=10
37 |       - TIMEOUT=60000
38 |       - QUEUED=10
39 | 
40 | networks:
41 |   default:
42 |     driver: bridge
43 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import List, Dict
  3 | 
  4 | from pydantic import BaseModel
  5 | 
  6 | from dotenv import load_dotenv
  7 | import httpx
  8 | from fastapi import FastAPI, Query, Request
  9 | from fastapi.responses import JSONResponse, PlainTextResponse
 10 | 
 11 | from bs4 import BeautifulSoup, Comment
 12 | import json
 13 | import html2text
 14 | from youtube_transcript_api import YouTubeTranscriptApi
 15 | import re
 16 | # Load .env file
 17 | load_dotenv()
 18 | 
 19 | # Retrieve environment variables
 20 | SEARXNG_URL = os.getenv('SEARXNG_URL')
 21 | BROWSERLESS_URL = os.getenv('BROWSERLESS_URL')
 22 | TOKEN = os.getenv('TOKEN')
 23 | PROXY_PROTOCOL = os.getenv('PROXY_PROTOCOL', 'http')
 24 | PROXY_URL = os.getenv('PROXY_URL')
 25 | PROXY_USERNAME = os.getenv('PROXY_USERNAME')
 26 | PROXY_PASSWORD = os.getenv('PROXY_PASSWORD')
 27 | PROXY_PORT = os.getenv('PROXY_PORT')
 28 | 
 29 | REQUEST_TIMEOUT = int(os.getenv('REQUEST_TIMEOUT', '30'))
 30 | 
 31 | 
 32 | 
 33 | # AI Integration
 34 | FILTER_SEARCH_RESULT_BY_AI = os.getenv('FILTER_SEARCH_RESULT_BY_AI', 'false').lower() == 'true'
 35 | AI_ENGINE = os.getenv('AI_ENGINE', 'openai')
 36 | 
 37 | # Domains that should only be accessed using Browserless
 38 | domains_only_for_browserless = ["twitter", "x", "facebook", "ucarspro"]
 39 | 
 40 | # Create FastAPI app
 41 | app = FastAPI()
 42 | 
 43 | HEADERS = {
 44 |     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
 45 | }
 46 | 
 47 | def get_proxies(without=False):
 48 |     if PROXY_URL and PROXY_USERNAME and PROXY_PORT:
 49 |         if without:
 50 |             return {
 51 |                 "http": f"{PROXY_PROTOCOL}://{PROXY_USERNAME}:{PROXY_PASSWORD}@{PROXY_URL}:{PROXY_PORT}",
 52 |                 "https": f"{PROXY_PROTOCOL}://{PROXY_USERNAME}:{PROXY_PASSWORD}@{PROXY_URL}:{PROXY_PORT}"
 53 |             }
 54 |         return {
 55 |             "http://": f"{PROXY_PROTOCOL}://{PROXY_USERNAME}:{PROXY_PASSWORD}@{PROXY_URL}:{PROXY_PORT}",
 56 |             "https://": f"{PROXY_PROTOCOL}://{PROXY_USERNAME}:{PROXY_PASSWORD}@{PROXY_URL}:{PROXY_PORT}"
 57 |         }
 58 |     return None
 59 | 
 60 | def fetch_content(url):
 61 |     proxies = get_proxies()
 62 |     def fetch_normal_content(url):
 63 |         try:
 64 |             response = httpx.get(url, headers=HEADERS, timeout=REQUEST_TIMEOUT, proxies=proxies, follow_redirects=True)
 65 |             response.raise_for_status()
 66 |             return response.text
 67 |         except httpx.RequestError as e:
 68 |             print(f"An error occurred while requesting {url}: {e}")
 69 |         except httpx.HTTPStatusError as e:
 70 |             print(f"HTTP error occurred: {e}")
 71 |         return None
 72 | 
 73 |     def fetch_browserless_content(url):
 74 |         try:
 75 |             browserless_url = f"{BROWSERLESS_URL}/content"
 76 |             params = {
 77 |                 "headless": False,
 78 |                 "stealth": True,
 79 |             }
 80 |             if TOKEN:
 81 |                 params['token'] = TOKEN
 82 | 
 83 |             proxy_url = f"{PROXY_PROTOCOL}://{PROXY_URL}:{PROXY_PORT}" if PROXY_URL and PROXY_PORT else None
 84 |             if proxy_url:
 85 |                 params['--proxy-server'] = proxy_url
 86 | 
 87 |             browserless_data = {
 88 |                 "url": url,
 89 |                 "rejectResourceTypes": ["image", "stylesheet"],
 90 |                 "gotoOptions": {"waitUntil": "networkidle0", "timeout": REQUEST_TIMEOUT * 1000},
 91 |                 "bestAttempt": True,
 92 |                 "setJavaScriptEnabled": True,
 93 |             }
 94 |             if PROXY_USERNAME and PROXY_PASSWORD:
 95 |                 browserless_data["authenticate"] = {
 96 |                     "username": PROXY_USERNAME,
 97 |                     "password": PROXY_PASSWORD
 98 |                 }
 99 | 
100 |             headers = {
101 |                 'Cache-Control': 'no-cache',
102 |                 'Content-Type': 'application/json'
103 |             }
104 | 
105 |             response = httpx.post(browserless_url, params=params, headers=headers, data=json.dumps(browserless_data), timeout=REQUEST_TIMEOUT * 2)
106 |             response.raise_for_status()
107 |             return response.text
108 |         except httpx.RequestError as e:
109 |             print(f"An error occurred while requesting Browserless for {url}: {e}")
110 |         except httpx.HTTPStatusError as e:
111 |             print(f"HTTP error occurred with Browserless: {e}")
112 |         return None
113 | 
114 |     if any(domain in url for domain in domains_only_for_browserless):
115 |         content = fetch_browserless_content(url)
116 |     else:
117 |         content = fetch_normal_content(url)
118 |         if content is None:
119 |             content = fetch_browserless_content(url)
120 | 
121 |     return content
122 | 
123 | def get_transcript(video_id: str, format: str = "markdown"):
124 |     try:
125 |         transcript_list = YouTubeTranscriptApi.get_transcript(video_id, proxies=get_proxies(without=True))
126 |         transcript = " ".join([entry['text'] for entry in transcript_list])
127 | 
128 |         # Fetch the title from the video page
129 |         video_url = f"https://www.youtube.com/watch?v={video_id}"
130 |         video_page = fetch_content(video_url)
131 |         title = extract_title(video_page)
132 | 
133 |         if format == "json":
134 |             return JSONResponse({"url": video_url, "title": title, "transcript": transcript})
135 |         return PlainTextResponse(f"Title: {title}\n\nURL Source: {video_url}\n\nTranscript:\n{transcript}")
136 |     except Exception as e:
137 |         return PlainTextResponse(f"Failed to retrieve transcript: {str(e)}")
138 | 
139 | def extract_title(html_content):
140 |     if html_content:
141 |         soup = BeautifulSoup(html_content, 'html.parser')
142 |         title = soup.find("title")
143 |         return title.string.replace(" - YouTube", "") if title else 'No title'
144 |     return 'No title'
145 | 
146 | def clean_html(html):
147 |     soup = BeautifulSoup(html, 'html.parser')
148 |     
149 |     # Remove all script, style, and other unnecessary elements
150 |     for script_or_style in soup(["script", "style", "header", "footer", "noscript", "form", "input", "textarea", "select", "option", "button", "svg", "iframe", "object", "embed", "applet", "nav", "navbar"]):
151 |         script_or_style.decompose()
152 | 
153 |     # remove ids "layers"
154 |     ids = ['layers']
155 |     
156 |     for id_ in ids:
157 |         tag = soup.find(id=id_)
158 |         if tag:
159 |             tag.decompose()
160 |     
161 |     # Remove unwanted classes and ids
162 |     for tag in soup.find_all(True):
163 |         tag.attrs = {key: value for key, value in tag.attrs.items() if key not in ['class', 'id', 'style']}
164 |     
165 |     # Remove comments
166 |     for comment in soup.find_all(string=lambda text: isinstance(text, Comment)):
167 |         comment.extract()
168 |     
169 |     return str(soup)
170 | 
171 | def parse_html_to_markdown(html, url, title=None):
172 |     cleaned_html = clean_html(html)
173 |     title_ = title or extract_title(html)
174 | 
175 |     # Convert the extracted HTML to Markdown
176 |     text_maker = html2text.HTML2Text()
177 |     text_maker.ignore_links = False  # Include links
178 |     text_maker.ignore_tables = False
179 |     text_maker.bypass_tables = False  # Format tables in Markdown
180 |     text_maker.ignore_images = False  # Include images
181 |     text_maker.protect_links = True   # Protect links from line breaks
182 |     text_maker.mark_code = True       # Mark code with [code]...[/code] blocks
183 |     
184 |     # Convert HTML to Markdown
185 |     markdown_content = text_maker.handle(cleaned_html)
186 |     
187 |     return {
188 |         "title": title_,
189 |         "url": url,
190 |         "markdown_content": markdown_content
191 |     }
192 | 
193 | def rerenker_ai(data: Dict[str, List[dict]], max_token: int = 2000) -> List[dict]:
194 |     client = None
195 |     model = None
196 |     class ResultItem(BaseModel):
197 |         title: str
198 |         url: str
199 |         content: str
200 |     class SearchResult(BaseModel):
201 |         results: List[ResultItem]
202 |     system_message = (
203 |         'You will be given a JSON format of search results and a search query. '
204 |         'Extract only "exact and most" related search `results` based on the `query`. '
205 |         'If the "content" field is empty, use the "title" or "url" field to determine relevance. '
206 |         f' Return the results in same JSON format as you would be given, the JSON object must use the schema: {json.dumps(SearchResult.schema())}'
207 |     )
208 |     
209 |     if AI_ENGINE == "groq":
210 |         from groq import Groq
211 |         client = Groq()
212 |         model = os.getenv('GROQ_MODEL', 'llama3-8b-8192')
213 | 
214 |     else:
215 |         import openai
216 |         client = openai
217 |         model = os.getenv('OPENAI_MODEL', 'gpt-3.5-turbo-0125')
218 |     
219 |     filtered_results = []
220 |     batch_size = 10
221 |     query = data["query"]
222 |     results = data["results"]
223 |     
224 |     for i in range(0, len(results), batch_size):
225 |         batch = results[i:i+batch_size]
226 |         processed_batch = [
227 |             {
228 |                 "title": item.get("title", ""),
229 |                 "url": item.get("url", ""),
230 |                 "content": item.get("content", "")
231 |             } 
232 |             for item in batch
233 |         ]
234 | 
235 |         response = client.chat.completions.create(
236 |             model=model,
237 |             stream=False,
238 |             messages=[
239 |                 {
240 |                     "role": "system",
241 |                     "content": system_message
242 |                 },
243 |                 {
244 |                     "role": "user",
245 |                     "content": json.dumps({"query": query, "results": processed_batch}) + "\n\nExtract the most relevant search results based on the query and ensure each result contains \"content.\" Return them in JSON format with \"title,\" \"url,\" and \"content\" fields only."
246 |                 }
247 |             ],
248 |             temperature=0.5,
249 |             max_tokens=max_token,
250 |             response_format={"type":"json_object"}
251 | 
252 |         )
253 |         print(response.choices[0].message.content)
254 |         batch_filtered_results = json.loads(response.choices[0].message.content)
255 |         if 'results' in batch_filtered_results:
256 |             filtered_results.extend(batch_filtered_results['results'])
257 |         else:
258 |             print(f"Warning: 'results' key missing in batch response: {batch_filtered_results}")
259 | 
260 |     return {"results": filtered_results, "query": query}
261 | 
262 | def searxng(query: str, categories: str = "general") -> list:
263 |     searxng_url = f"{SEARXNG_URL}/search?q={query}&categories={categories}&format=json"
264 |     try:
265 |         response = httpx.get(searxng_url, headers=HEADERS, timeout=REQUEST_TIMEOUT)
266 |         response.raise_for_status()
267 |     except httpx.RequestError as e:
268 |         return [{"error": f"Search query failed with error: {e}"}]
269 |     except httpx.HTTPStatusError as e:
270 |         return [{"error": f"Search query failed with HTTP error: {e}"}]
271 | 
272 |     search_results = response.json()
273 |     return search_results
274 | 
275 | def search(query: str, num_results: int, json_response: bool = False) -> list:
276 |     search_results = searxng(query)
277 |     if FILTER_SEARCH_RESULT_BY_AI:
278 |         search_results = rerenker_ai(search_results)
279 | 
280 |     json_return = []
281 |     markdown_return = ""
282 |     for result in search_results["results"][:num_results]:
283 |         url = result["url"]
284 |         title = result["title"]
285 |         if "youtube" in url:
286 |             video_id = re.search(r"v=([^&]+)", url)
287 |             if json_response:
288 |                 json_return.append(get_transcript(video_id.group(1), "json"))
289 |             else:
290 |                 markdown_return += get_transcript(video_id.group(1)) + "\n\n ---------------- \n\n"
291 |             continue
292 |         html_content = fetch_content(url)
293 |         if html_content:
294 |             markdown_data = parse_html_to_markdown(html_content, url, title=title)
295 |             if markdown_data["markdown_content"].strip():  # Check if markdown content is not empty
296 |                 if json_response:
297 |                     json_return.append(markdown_data)
298 |                 else:
299 |                     markdown_return += (
300 |                     f"Title: {markdown_data['title']}\n\n"
301 |                     f"URL Source: {markdown_data['url']}\n\n"
302 |                     f"Markdown Content:\n{markdown_data['markdown_content']}"
303 |                 ) + "\n\n ---------------- \n\n"
304 |                 
305 |     
306 |     if json_response:
307 |         return JSONResponse(json_return)
308 |     return PlainTextResponse(markdown_return)
309 | 
310 | @app.get("/images")
311 | def get_search_images(
312 |     q: str = Query(..., description="Search images"),
313 |     num_results: int = Query(5, description="Number of results")
314 |     ):
315 |     result_list = searxng(q, categories="images")
316 |     return JSONResponse(result_list["results"][:num_results])
317 | 
318 | @app.get("/videos")
319 | def get_search_videos(
320 |     q: str = Query(..., description="Search videos"),
321 |     num_results: int = Query(5, description="Number of results")
322 |     ):
323 |     result_list = searxng(q, categories="videos")
324 |     return JSONResponse(result_list["results"][:num_results])
325 | 
326 | @app.get("/")
327 | def get_search_results(
328 |     q: str = Query(..., description="Search query"), 
329 |     num_results: int = Query(5, description="Number of results"),
330 |     format: str = Query("markdown", description="Output format (markdown or json)")):
331 |     result_list = search(q, num_results, format == "json")
332 |     return result_list
333 | 
334 | @app.get("/r/{url:path}")
335 | def fetch_url(request: Request, url: str, format: str = Query("markdown", description="Output format (markdown or json)")):
336 |     if "youtube" in url:
337 |         return get_transcript(request.query_params.get('v'), format)
338 |     
339 |     html_content = fetch_content(url)
340 |     if html_content:
341 |         markdown_data = parse_html_to_markdown(html_content, url)
342 |         if format == "json":
343 |             return JSONResponse(markdown_data)
344 |         
345 |         response_text = (
346 |             f"Title: {markdown_data['title']}\n\n"
347 |             f"URL Source: {markdown_data['url']}\n\n"
348 |             f"Markdown Content:\n{markdown_data['markdown_content']}"
349 |         )
350 |         return PlainTextResponse(response_text)
351 |     return PlainTextResponse("Failed to retrieve content")
352 | 
353 | # Example usage
354 | if __name__ == "__main__":
355 |     import uvicorn
356 |     uvicorn.run(app, host="0.0.0.0", port=8000)
357 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | # Jina.ai Alternative Search Result Scraper with Markdown Output Using FastAPI, SearXNG, Browserless, and AI Integration
  2 | 
  3 | English | [中文版](README_zh.md)
  4 | 
  5 | [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
  6 | 
  7 | ## Description
  8 | 
  9 | This project provides a powerful web scraping tool that fetches search results and converts them into Markdown format using FastAPI, SearXNG, and Browserless. It includes the capability to use proxies for web scraping and handles HTML content conversion to Markdown efficiently. Now featuring AI Integration for filtering search results. Alternatives include Jina.ai, FireCrawl AI, Exa AI, and 2markdown, offering various web scraping and search engine solutions for developers.
 10 | 
 11 | ## Table of Contents
 12 | - [Jina.ai Alternative Search Result Scraper with Markdown Output Using FastAPI, SearXNG, Browserless, and AI Integration](#jinaai-alternative-search-result-scraper-with-markdown-output-using-fastapi-searxng-browserless-and-ai-integration)
 13 |   - [Description](#description)
 14 |   - [Table of Contents](#table-of-contents)
 15 |   - [Alternatives:](#alternatives)
 16 |   - [Features](#features)
 17 |   - [Prerequisites](#prerequisites)
 18 |   - [Docker Setup](#docker-setup)
 19 |   - [Manual Setup](#manual-setup)
 20 |   - [Usage](#usage)
 21 |     - [Search Endpoint](#search-endpoint)
 22 |     - [Fetch URL Content](#fetch-url-content)
 23 |     - [Fetching Images](#fetching-images)
 24 |     - [Fetching Videos](#fetching-videos)
 25 |   - [Using Proxies](#using-proxies)
 26 |   - [Roadmap](#roadmap)
 27 |   - [Code Explanation](#code-explanation)
 28 |   - [License](#license)
 29 |   - [Author](#author)
 30 |   - [Contributing](#contributing)
 31 |   - [Acknowledgements](#acknowledgements)
 32 |   - [Star History](#star-history)
 33 | 
 34 | ## Alternatives:
 35 | 
 36 | - [Jina.ai](https://jina.ai/): A powerful search engine for developers.
 37 | - [FireCrawl AI](https://firecrawl.dev/): A web scraping API for developers.
 38 | - [Exa AI](https://exa.ai/): A web scraping API for developers.
 39 | - [2markdown](https://2markdown.com/): A web scraping tool that converts HTML to Markdown.
 40 | 
 41 | ## Features
 42 | 
 43 | - **FastAPI**: A modern, fast web framework for building APIs with Python.
 44 | - **SearXNG**: An open-source internet metasearch engine.
 45 | - **Browserless**: A web browser automation service.
 46 | - **Markdown Output**: Converts HTML content to Markdown format.
 47 | - **Proxy Support**: Utilizes proxies for secure and anonymous scraping.
 48 | - **AI Integration (Reranker AI)**: Filters search results using AI to provide the most relevant content.
 49 | - **YouTube Transcriptions**: Fetches YouTube video transcriptions.
 50 | - **Image and Video Search**: Fetches images and video results using SearXNG.
 51 | 
 52 | ## Prerequisites
 53 | 
 54 | Ensure you have the following installed:
 55 | 
 56 | - Python 3.11
 57 | - Virtualenv
 58 | - Docker
 59 | 
 60 | ## Docker Setup
 61 | 
 62 | You can use Docker to simplify the setup process. Follow these steps:
 63 | 
 64 | 1. **Clone the repository**:
 65 |     ```sh
 66 |     git clone https://github.com/essamamdani/search-result-scraper-markdown.git
 67 |     cd search-result-scraper-markdown
 68 |     ```
 69 | 
 70 | 2. **Run Docker Compose**:
 71 |     ```sh
 72 |     docker compose up --build
 73 |     ```
 74 | 
 75 | With this setup, if you change the `.env` or `main.py` file, you no longer need to restart Docker. Changes will be reloaded automatically.
 76 | 
 77 | ## Manual Setup
 78 | 
 79 | Follow these steps for manual setup:
 80 | 
 81 | 1. **Clone the repository**:
 82 |     ```sh
 83 |     git clone https://github.com/essamamdani/search-result-scraper-markdown.git
 84 |     cd search-result-scraper-markdown
 85 |     ```
 86 | 
 87 | 2. **Create and activate virtual environment**:
 88 |     ```sh
 89 |     virtualenv venv
 90 |     source venv/bin/activate
 91 |     ```
 92 | 
 93 | 3. **Install dependencies**:
 94 |     ```sh
 95 |     pip install -r requirements.txt
 96 |     ```
 97 | 
 98 | 4. **Create a .env file** in the root directory with the following content:
 99 |     ```bash
100 |     SEARXNG_URL=http://searxng:8080
101 |     BROWSERLESS_URL=http://browserless:3000
102 |     TOKEN=your_browserless_token_here  # Replace with your actual token
103 |     # PROXY_PROTOCOL=http
104 |     # PROXY_URL=your_proxy_url
105 |     # PROXY_USERNAME=your_proxy_username
106 |     # PROXY_PASSWORD=your_proxy_password
107 |     # PROXY_PORT=your_proxy_port
108 |     REQUEST_TIMEOUT=30
109 | 
110 |     # AI Integration for search result filter
111 |     FILTER_SEARCH_RESULT_BY_AI=true
112 |     AI_ENGINE=groq
113 |     # GROQ
114 |     GROQ_API_KEY=yours_groq_api_key_here
115 |     GROQ_MODEL=llama3-8b-8192
116 |     # OPENAI
117 |     # OPENAI_API_KEY=your_openai_api_key_here
118 |     # OPENAI_MODEL=gpt-3.5-turbo-0125
119 |     ```
120 | 
121 | 5. **Run Docker containers for SearXNG and Browserless**:
122 |     ```sh
123 |     ./run-services.sh
124 |     ```
125 | 
126 | 6. **Start the FastAPI application**:
127 |     ```sh
128 |     uvicorn main:app --host 0.0.0.0 --port 8000
129 |     ```
130 | 
131 | ## Usage
132 | 
133 | ### Search Endpoint
134 | 
135 | To perform a search query, send a GET request to the root endpoint `/` with the query parameters `q` (search query), `num_results` (number of results), and `format` (get response in JSON or by default in Markdown).
136 | 
137 | Example:
138 | ```sh
139 | curl "http://localhost:8000/?q=python&num_results=5&format=json" # for JSON format
140 | curl "http://localhost:8000/?q=python&num_results=5" # by default Markdown
141 | ```
142 | 
143 | ### Fetch URL Content
144 | 
145 | To fetch and convert the content of a specific URL to Markdown, send a GET request to the `/r/{url:path}` endpoint.
146 | 
147 | Example:
148 | ```sh
149 | curl "http://localhost:8000/r/https://example.com&format=json" # for JSON format
150 | curl "http://localhost:8000/r/https://example.com" # by default Markdown
151 | ```
152 | 
153 | ### Fetching Images
154 | 
155 | To fetch image search results, send a GET request to the `/images` endpoint with the query parameters `q` (search query) and `num_results` (number of results).
156 | 
157 | Example:
158 | ```sh
159 | curl "http://localhost:8000/images?q=puppies&num_results=5"
160 | ```
161 | 
162 | ### Fetching Videos
163 | 
164 | To fetch video search results, send a GET request to the `/videos` endpoint with the query parameters `q` (search query) and `num_results` (number of results).
165 | 
166 | Example:
167 | ```sh
168 | curl "http://localhost:8000/videos?q=cooking+recipes&num_results=5"
169 | ```
170 | 
171 | ## Using Proxies
172 | 
173 | This project uses Geonode proxies for web scraping. You can use [my Geonode affiliate link](https://geonode.com/invite/47389) to get started with their proxy services.
174 | 
175 | ## Roadmap
176 | 
177 | - [x] **FastAPI**: A modern, fast web framework for building APIs with Python.
178 | - [x] **SearXNG**: An open-source internet metasearch engine.
179 | - [x] **Browserless**: A web browser automation service.
180 | - [x] **Markdown Output**: Converts HTML content to Markdown format.
181 | - [x] **Proxy Support**: Utilizes proxies for secure and anonymous scraping.
182 | - [x] **AI Integration (Reranker AI)**: Filters search results using AI to provide the most relevant content.
183 | - [x] **YouTube Transcriptions**: Fetches YouTube video transcriptions.
184 | - [x] **Image and Video Search**: Fetches images and video results using SearXNG.
185 | 
186 | ## Code Explanation
187 | 
188 | For a detailed explanation of the code, visit the article [here](https://www.essamamdani.com/search-result-scraper-markdown).
189 | 
190 | ## License
191 | 
192 | This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
193 | 
194 | ## Author
195 | 
196 | Essa Mamdani - [essamamdani.com](https://essamamdani.com)
197 | 
198 | ## Contributing
199 | 
200 | Contributions are welcome! Please feel free to submit a Pull Request.
201 | 
202 | ## Acknowledgements
203 | 
204 | - [FastAPI](https://fastapi.tiangolo.com/)
205 | - [SearXNG](https://github.com/searxng/searxng)
206 | - [Browserless](https://www.browserless.io/)
207 | 
208 | ## Star History
209 | 
210 | [![Star History Chart](https://api.star-history.com/svg?repos=essamamdani/search-result-scraper-markdown&type=Date)](https://star-history.com/#essamamdani/search-result-scraper-markdown&Date)
211 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | fastapi
 2 | uvicorn
 3 | python-dotenv
 4 | httpx
 5 | markdownify
 6 | openai
 7 | groq
 8 | beautifulsoup4
 9 | html2text
10 | youtube-transcript-api
11 | # ballyregan
12 | # typer==0.6.1
13 | # rich==12.6.0
14 | # pydantic==1.10.17


--------------------------------------------------------------------------------
/run-services.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Run SearXNG
 4 | docker run -d --name searxng -p 8888:8888 -v ./searxng:/etc/searxng:rw searxng/searxng
 5 | 
 6 | # Run Browserless
 7 | docker run -d --name browserless -p 3000:3000 browserless/chrome
 8 | 
 9 | echo "SearXNG is running at http://localhost:8888"
10 | echo "Browserless is running at http://localhost:3000"
11 | 


--------------------------------------------------------------------------------
/searxng/limiter.toml:
--------------------------------------------------------------------------------
1 | # This configuration file updates the default configuration file
2 | # See https://github.com/searxng/searxng/blob/master/searx/botdetection/limiter.toml
3 | 
4 | [botdetection.ip_limit]
5 | # activate link_token method in the ip_limit method
6 | link_token = false
7 | 


--------------------------------------------------------------------------------
/searxng/settings.yml:
--------------------------------------------------------------------------------
 1 | # see https://docs.searxng.org/admin/settings/settings.html#settings-use-default-settings
 2 | use_default_settings: true
 3 | 
 4 | server:
 5 |   secret_key: "f9e603d4191caab069b021fa0568391a33c8a837b470892c64461b5dd12464f4"
 6 |   limiter: false
 7 |   image_proxy: true
 8 |   port: 8080
 9 |   bind_address : "0.0.0.0"
10 | 
11 | ui:
12 |   static_use_hash: true
13 | search:
14 |   safe_search: 0
15 |   autocomplete: ""
16 |   default_lang: ""
17 |   formats:
18 |     - html
19 |     - json
20 | 
21 | # communication with search engines
22 | #
23 | outgoing:
24 |   # default timeout in seconds, can be override by engine
25 |   request_timeout: 10.0
26 |   # the maximum timeout in seconds
27 |   max_request_timeout: 30.0
28 |   # suffix of searx_useragent, could contain information like an email address
29 |   # to the administrator
30 |   useragent_suffix: ""
31 |   # The maximum number of concurrent connections that may be established.
32 |   pool_connections: 100
33 |   # Allow the connection pool to maintain keep-alive connections below this
34 |   # point.
35 |   pool_maxsize: 20
36 |   # See https://www.python-httpx.org/http2/
37 |   enable_http2: true
38 |   # uncomment below section if you want to use a proxyq see: SOCKS proxies
39 |   #   https://2.python-requests.org/en/latest/user/advanced/#proxies
40 |   # are also supported: see
41 |   #   https://2.python-requests.org/en/latest/user/advanced/#socks
42 |   #
43 |   # proxies:
44 |   #   all://:
45 |   #     - http://example.com:9090
46 |       
47 | 
48 |   # using_tor_proxy: false
49 |   #
50 |   # Extra seconds to add in order to account for the time taken by the proxy
51 |   #
52 |   #  extra_proxy_timeout: 10.0
53 |  
54 | 
55 | engines:
56 |   - name: soundcloud
57 |     engine: soundcloud
58 |     shortcut: sc
59 |     disabled: true 
60 | 
61 |   - name: wikidata
62 |     engine: wikidata
63 |     timeout: 30  # Increase timeout to 30 seconds
64 |     shortcut: wd
65 |     categories: general


--------------------------------------------------------------------------------
/searxng/settings.yml.new:
--------------------------------------------------------------------------------
   1 | general:
   2 |   # Debug mode, only for development. Is overwritten by ${SEARXNG_DEBUG}
   3 |   debug: false
   4 |   # displayed name
   5 |   instance_name: "searxng"
   6 |   # For example: https://example.com/privacy
   7 |   privacypolicy_url: false
   8 |   # use true to use your own donation page written in searx/info/en/donate.md
   9 |   # use false to disable the donation link
  10 |   donation_url: false
  11 |   # mailto:contact@example.com
  12 |   contact_url: false
  13 |   # record stats
  14 |   enable_metrics: true
  15 | 
  16 | brand:
  17 |   new_issue_url: https://github.com/searxng/searxng/issues/new
  18 |   docs_url: https://docs.searxng.org/
  19 |   public_instances: https://searx.space
  20 |   wiki_url: https://github.com/searxng/searxng/wiki
  21 |   issue_url: https://github.com/searxng/searxng/issues
  22 |   # custom:
  23 |   #   maintainer: "Jon Doe"
  24 |   #   # Custom entries in the footer: [title]: [link]
  25 |   #   links:
  26 |   #     Uptime: https://uptime.searxng.org/history/darmarit-org
  27 |   #     About: "https://searxng.org"
  28 | 
  29 | search:
  30 |   # Filter results. 0: None, 1: Moderate, 2: Strict
  31 |   safe_search: 0
  32 |   # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "yandex", "mwmbl",
  33 |   # "seznam", "startpage", "stract", "swisscows", "qwant", "wikipedia" - leave blank to turn it off
  34 |   # by default.
  35 |   autocomplete: ""
  36 |   # minimun characters to type before autocompleter starts
  37 |   autocomplete_min: 4
  38 |   # Default search language - leave blank to detect from browser information or
  39 |   # use codes from 'languages.py'
  40 |   default_lang: "auto"
  41 |   # max_page: 0  # if engine supports paging, 0 means unlimited numbers of pages
  42 |   # Available languages
  43 |   # languages:
  44 |   #   - all
  45 |   #   - en
  46 |   #   - en-US
  47 |   #   - de
  48 |   #   - it-IT
  49 |   #   - fr
  50 |   #   - fr-BE
  51 |   # ban time in seconds after engine errors
  52 |   ban_time_on_fail: 5
  53 |   # max ban time in seconds after engine errors
  54 |   max_ban_time_on_fail: 120
  55 |   suspended_times:
  56 |     # Engine suspension time after error (in seconds; set to 0 to disable)
  57 |     # For error "Access denied" and "HTTP error [402, 403]"
  58 |     SearxEngineAccessDenied: 86400
  59 |     # For error "CAPTCHA"
  60 |     SearxEngineCaptcha: 86400
  61 |     # For error "Too many request" and "HTTP error 429"
  62 |     SearxEngineTooManyRequests: 3600
  63 |     # Cloudflare CAPTCHA
  64 |     cf_SearxEngineCaptcha: 1296000
  65 |     cf_SearxEngineAccessDenied: 86400
  66 |     # ReCAPTCHA
  67 |     recaptcha_SearxEngineCaptcha: 604800
  68 | 
  69 |   # remove format to deny access, use lower case.
  70 |   # formats: [html, csv, json, rss]
  71 |   formats:
  72 |     - html
  73 | 
  74 | server:
  75 |   # Is overwritten by ${SEARXNG_PORT} and ${SEARXNG_BIND_ADDRESS}
  76 |   port: 8888
  77 |   bind_address: "127.0.0.1"
  78 |   # public URL of the instance, to ensure correct inbound links. Is overwritten
  79 |   # by ${SEARXNG_URL}.
  80 |   base_url: /  # "http://example.com/location"
  81 |   # rate limit the number of request on the instance, block some bots.
  82 |   # Is overwritten by ${SEARXNG_LIMITER}
  83 |   limiter: false
  84 |   # enable features designed only for public instances.
  85 |   # Is overwritten by ${SEARXNG_PUBLIC_INSTANCE}
  86 |   public_instance: false
  87 | 
  88 |   # If your instance owns a /etc/searxng/settings.yml file, then set the following
  89 |   # values there.
  90 | 
  91 |   secret_key: "22e7603074350bbecdc211f4280e9527428e2bac96599a37fddddca831fd71c1"  # Is overwritten by ${SEARXNG_SECRET}
  92 |   # Proxy image results through SearXNG. Is overwritten by ${SEARXNG_IMAGE_PROXY}
  93 |   image_proxy: false
  94 |   # 1.0 and 1.1 are supported
  95 |   http_protocol_version: "1.0"
  96 |   # POST queries are more secure as they don't show up in history but may cause
  97 |   # problems when using Firefox containers
  98 |   method: "POST"
  99 |   default_http_headers:
 100 |     X-Content-Type-Options: nosniff
 101 |     X-Download-Options: noopen
 102 |     X-Robots-Tag: noindex, nofollow
 103 |     Referrer-Policy: no-referrer
 104 | 
 105 | redis:
 106 |   # URL to connect redis database. Is overwritten by ${SEARXNG_REDIS_URL}.
 107 |   # https://docs.searxng.org/admin/settings/settings_redis.html#settings-redis
 108 |   url: false
 109 | 
 110 | ui:
 111 |   # Custom static path - leave it blank if you didn't change
 112 |   static_path: ""
 113 |   # Is overwritten by ${SEARXNG_STATIC_USE_HASH}.
 114 |   static_use_hash: false
 115 |   # Custom templates path - leave it blank if you didn't change
 116 |   templates_path: ""
 117 |   # query_in_title: When true, the result page's titles contains the query
 118 |   # it decreases the privacy, since the browser can records the page titles.
 119 |   query_in_title: false
 120 |   # infinite_scroll: When true, automatically loads the next page when scrolling to bottom of the current page.
 121 |   infinite_scroll: false
 122 |   # ui theme
 123 |   default_theme: simple
 124 |   # center the results ?
 125 |   center_alignment: false
 126 |   # URL prefix of the internet archive, don't forget trailing slash (if needed).
 127 |   # cache_url: "https://webcache.googleusercontent.com/search?q=cache:"
 128 |   # Default interface locale - leave blank to detect from browser information or
 129 |   # use codes from the 'locales' config section
 130 |   default_locale: ""
 131 |   # Open result links in a new tab by default
 132 |   # results_on_new_tab: false
 133 |   theme_args:
 134 |     # style of simple theme: auto, light, dark
 135 |     simple_style: auto
 136 |   # Perform search immediately if a category selected.
 137 |   # Disable to select multiple categories at once and start the search manually.
 138 |   search_on_category_select: true
 139 |   # Hotkeys: default or vim
 140 |   hotkeys: default
 141 | 
 142 | # Lock arbitrary settings on the preferences page.  To find the ID of the user
 143 | # setting you want to lock, check the ID of the form on the page "preferences".
 144 | #
 145 | # preferences:
 146 | #   lock:
 147 | #     - language
 148 | #     - autocomplete
 149 | #     - method
 150 | #     - query_in_title
 151 | 
 152 | # searx supports result proxification using an external service:
 153 | # https://github.com/asciimoo/morty uncomment below section if you have running
 154 | # morty proxy the key is base64 encoded (keep the !!binary notation)
 155 | # Note: since commit af77ec3, morty accepts a base64 encoded key.
 156 | #
 157 | # result_proxy:
 158 | #   url: http://127.0.0.1:3000/
 159 | #   # the key is a base64 encoded string, the YAML !!binary prefix is optional
 160 | #   key: !!binary "your_morty_proxy_key"
 161 | #   # [true|false] enable the "proxy" button next to each result
 162 | #   proxify_results: true
 163 | 
 164 | # communication with search engines
 165 | #
 166 | outgoing:
 167 |   # default timeout in seconds, can be override by engine
 168 |   request_timeout: 3.0
 169 |   # the maximum timeout in seconds
 170 |   # max_request_timeout: 10.0
 171 |   # suffix of searx_useragent, could contain information like an email address
 172 |   # to the administrator
 173 |   useragent_suffix: ""
 174 |   # The maximum number of concurrent connections that may be established.
 175 |   pool_connections: 100
 176 |   # Allow the connection pool to maintain keep-alive connections below this
 177 |   # point.
 178 |   pool_maxsize: 20
 179 |   # See https://www.python-httpx.org/http2/
 180 |   enable_http2: true
 181 |   # uncomment below section if you want to use a custom server certificate
 182 |   # see https://www.python-httpx.org/advanced/#changing-the-verification-defaults
 183 |   # and https://www.python-httpx.org/compatibility/#ssl-configuration
 184 |   #  verify: ~/.mitmproxy/mitmproxy-ca-cert.cer
 185 |   #
 186 |   # uncomment below section if you want to use a proxyq see: SOCKS proxies
 187 |   #   https://2.python-requests.org/en/latest/user/advanced/#proxies
 188 |   # are also supported: see
 189 |   #   https://2.python-requests.org/en/latest/user/advanced/#socks
 190 |   #
 191 |   #  proxies:
 192 |   #    all://:
 193 |   #      - http://proxy1:8080
 194 |   #      - http://proxy2:8080
 195 |   #
 196 |   #  using_tor_proxy: true
 197 |   #
 198 |   # Extra seconds to add in order to account for the time taken by the proxy
 199 |   #
 200 |   #  extra_proxy_timeout: 10
 201 |   #
 202 |   # uncomment below section only if you have more than one network interface
 203 |   # which can be the source of outgoing search requests
 204 |   #
 205 |   #  source_ips:
 206 |   #    - 1.1.1.1
 207 |   #    - 1.1.1.2
 208 |   #    - fe80::/126
 209 | 
 210 | # External plugin configuration, for more details see
 211 | #   https://docs.searxng.org/dev/plugins.html
 212 | #
 213 | # plugins:
 214 | #   - plugin1
 215 | #   - plugin2
 216 | #   - ...
 217 | 
 218 | # Comment or un-comment plugin to activate / deactivate by default.
 219 | #
 220 | # enabled_plugins:
 221 | #   # these plugins are enabled if nothing is configured ..
 222 | #   - 'Hash plugin'
 223 | #   - 'Self Information'
 224 | #   - 'Tracker URL remover'
 225 | #   - 'Ahmia blacklist'  # activation depends on outgoing.using_tor_proxy
 226 | #   # these plugins are disabled if nothing is configured ..
 227 | #   - 'Hostnames plugin'  # see 'hostnames' configuration below
 228 | #   - 'Basic Calculator'
 229 | #   - 'Open Access DOI rewrite'
 230 | #   - 'Tor check plugin'
 231 | #   # Read the docs before activate: auto-detection of the language could be
 232 | #   # detrimental to users expectations / users can activate the plugin in the
 233 | #   # preferences if they want.
 234 | #   - 'Autodetect search language'
 235 | 
 236 | # Configuration of the "Hostnames plugin":
 237 | #
 238 | # hostnames:
 239 | #   replace:
 240 | #     '(.*\.)?youtube\.com$': 'invidious.example.com'
 241 | #     '(.*\.)?youtu\.be$': 'invidious.example.com'
 242 | #     '(.*\.)?reddit\.com$': 'teddit.example.com'
 243 | #     '(.*\.)?redd\.it$': 'teddit.example.com'
 244 | #     '(www\.)?twitter\.com$': 'nitter.example.com'
 245 | #   remove:
 246 | #     - '(.*\.)?facebook.com$'
 247 | #   low_priority:
 248 | #     - '(.*\.)?google(\..*)?$'
 249 | #   high_priority:
 250 | #     - '(.*\.)?wikipedia.org$'
 251 | #
 252 | # Alternatively you can use external files for configuring the "Hostnames plugin":
 253 | #
 254 | # hostnames:
 255 | #  replace: 'rewrite-hosts.yml'
 256 | #
 257 | # Content of 'rewrite-hosts.yml' (place the file in the same directory as 'settings.yml'):
 258 | # '(.*\.)?youtube\.com$': 'invidious.example.com'
 259 | # '(.*\.)?youtu\.be$': 'invidious.example.com'
 260 | #
 261 | 
 262 | checker:
 263 |   # disable checker when in debug mode
 264 |   off_when_debug: true
 265 | 
 266 |   # use "scheduling: false" to disable scheduling
 267 |   # scheduling: interval or int
 268 | 
 269 |   # to activate the scheduler:
 270 |   # * uncomment "scheduling" section
 271 |   # * add "cache2 = name=searxngcache,items=2000,blocks=2000,blocksize=4096,bitmap=1"
 272 |   #   to your uwsgi.ini
 273 | 
 274 |   # scheduling:
 275 |   #   start_after: [300, 1800]  # delay to start the first run of the checker
 276 |   #   every: [86400, 90000]     # how often the checker runs
 277 | 
 278 |   # additional tests: only for the YAML anchors (see the engines section)
 279 |   #
 280 |   additional_tests:
 281 |     rosebud: &test_rosebud
 282 |       matrix:
 283 |         query: rosebud
 284 |         lang: en
 285 |       result_container:
 286 |         - not_empty
 287 |         - ['one_title_contains', 'citizen kane']
 288 |       test:
 289 |         - unique_results
 290 | 
 291 |     android: &test_android
 292 |       matrix:
 293 |         query: ['android']
 294 |         lang: ['en', 'de', 'fr', 'zh-CN']
 295 |       result_container:
 296 |         - not_empty
 297 |         - ['one_title_contains', 'google']
 298 |       test:
 299 |         - unique_results
 300 | 
 301 |   # tests: only for the YAML anchors (see the engines section)
 302 |   tests:
 303 |     infobox: &tests_infobox
 304 |       infobox:
 305 |         matrix:
 306 |           query: ["linux", "new york", "bbc"]
 307 |         result_container:
 308 |           - has_infobox
 309 | 
 310 | categories_as_tabs:
 311 |   general:
 312 |   images:
 313 |   videos:
 314 |   news:
 315 |   map:
 316 |   music:
 317 |   it:
 318 |   science:
 319 |   files:
 320 |   social media:
 321 | 
 322 | engines:
 323 |   - name: 9gag
 324 |     engine: 9gag
 325 |     shortcut: 9g
 326 |     disabled: true
 327 | 
 328 |   - name: annas archive
 329 |     engine: annas_archive
 330 |     disabled: true
 331 |     shortcut: aa
 332 | 
 333 |   # - name: annas articles
 334 |   #   engine: annas_archive
 335 |   #   shortcut: aaa
 336 |   #   # https://docs.searxng.org/dev/engines/online/annas_archive.html
 337 |   #   aa_content: 'magazine' # book_fiction, book_unknown, book_nonfiction, book_comic
 338 |   #   aa_ext: 'pdf'  # pdf, epub, ..
 339 |   #   aa_sort: oldest'  # newest, oldest, largest, smallest
 340 | 
 341 |   - name: apk mirror
 342 |     engine: apkmirror
 343 |     timeout: 4.0
 344 |     shortcut: apkm
 345 |     disabled: true
 346 | 
 347 |   - name: apple app store
 348 |     engine: apple_app_store
 349 |     shortcut: aps
 350 |     disabled: true
 351 | 
 352 |   # Requires Tor
 353 |   - name: ahmia
 354 |     engine: ahmia
 355 |     categories: onions
 356 |     enable_http: true
 357 |     shortcut: ah
 358 | 
 359 |   - name: anaconda
 360 |     engine: xpath
 361 |     paging: true
 362 |     first_page_num: 0
 363 |     search_url: https://anaconda.org/search?q={query}&page={pageno}
 364 |     results_xpath: //tbody/tr
 365 |     url_xpath: ./td/h5/a[last()]/@href
 366 |     title_xpath: ./td/h5
 367 |     content_xpath: ./td[h5]/text()
 368 |     categories: it
 369 |     timeout: 6.0
 370 |     shortcut: conda
 371 |     disabled: true
 372 | 
 373 |   - name: arch linux wiki
 374 |     engine: archlinux
 375 |     shortcut: al
 376 | 
 377 |   - name: artic
 378 |     engine: artic
 379 |     shortcut: arc
 380 |     timeout: 4.0
 381 | 
 382 |   - name: arxiv
 383 |     engine: arxiv
 384 |     shortcut: arx
 385 |     timeout: 4.0
 386 | 
 387 |   - name: ask
 388 |     engine: ask
 389 |     shortcut: ask
 390 |     disabled: true
 391 | 
 392 |   # tmp suspended:  dh key too small
 393 |   # - name: base
 394 |   #   engine: base
 395 |   #   shortcut: bs
 396 | 
 397 |   - name: bandcamp
 398 |     engine: bandcamp
 399 |     shortcut: bc
 400 |     categories: music
 401 | 
 402 |   - name: wikipedia
 403 |     engine: wikipedia
 404 |     shortcut: wp
 405 |     # add "list" to the array to get results in the results list
 406 |     display_type: ["infobox"]
 407 |     base_url: 'https://{language}.wikipedia.org/'
 408 |     categories: [general]
 409 | 
 410 |   - name: bilibili
 411 |     engine: bilibili
 412 |     shortcut: bil
 413 |     disabled: true
 414 | 
 415 |   - name: bing
 416 |     engine: bing
 417 |     shortcut: bi
 418 |     disabled: true
 419 | 
 420 |   - name: bing images
 421 |     engine: bing_images
 422 |     shortcut: bii
 423 | 
 424 |   - name: bing news
 425 |     engine: bing_news
 426 |     shortcut: bin
 427 | 
 428 |   - name: bing videos
 429 |     engine: bing_videos
 430 |     shortcut: biv
 431 | 
 432 |   - name: bitbucket
 433 |     engine: xpath
 434 |     paging: true
 435 |     search_url: https://bitbucket.org/repo/all/{pageno}?name={query}
 436 |     url_xpath: //article[@class="repo-summary"]//a[@class="repo-link"]/@href
 437 |     title_xpath: //article[@class="repo-summary"]//a[@class="repo-link"]
 438 |     content_xpath: //article[@class="repo-summary"]/p
 439 |     categories: [it, repos]
 440 |     timeout: 4.0
 441 |     disabled: true
 442 |     shortcut: bb
 443 |     about:
 444 |       website: https://bitbucket.org/
 445 |       wikidata_id: Q2493781
 446 |       official_api_documentation: https://developer.atlassian.com/bitbucket
 447 |       use_official_api: false
 448 |       require_api_key: false
 449 |       results: HTML
 450 | 
 451 |   - name: bpb
 452 |     engine: bpb
 453 |     shortcut: bpb
 454 |     disabled: true
 455 | 
 456 |   - name: btdigg
 457 |     engine: btdigg
 458 |     shortcut: bt
 459 |     disabled: true
 460 | 
 461 |   - name: openverse
 462 |     engine: openverse
 463 |     categories: images
 464 |     shortcut: opv
 465 | 
 466 |   - name: media.ccc.de
 467 |     engine: ccc_media
 468 |     shortcut: c3tv
 469 |     # We don't set language: de here because media.ccc.de is not just
 470 |     # for a German audience. It contains many English videos and many
 471 |     # German videos have English subtitles.
 472 |     disabled: true
 473 | 
 474 |   - name: chefkoch
 475 |     engine: chefkoch
 476 |     shortcut: chef
 477 |     # to show premium or plus results too:
 478 |     # skip_premium: false
 479 | 
 480 |   # - name: core.ac.uk
 481 |   #   engine: core
 482 |   #   categories: science
 483 |   #   shortcut: cor
 484 |   #   # get your API key from: https://core.ac.uk/api-keys/register/
 485 |   #   api_key: 'unset'
 486 | 
 487 |   - name: cppreference
 488 |     engine: cppreference
 489 |     shortcut: cpp
 490 |     paging: false
 491 |     disabled: true
 492 | 
 493 |   - name: crossref
 494 |     engine: crossref
 495 |     shortcut: cr
 496 |     timeout: 30
 497 |     disabled: true
 498 | 
 499 |   - name: crowdview
 500 |     engine: json_engine
 501 |     shortcut: cv
 502 |     categories: general
 503 |     paging: false
 504 |     search_url: https://crowdview-next-js.onrender.com/api/search-v3?query={query}
 505 |     results_query: results
 506 |     url_query: link
 507 |     title_query: title
 508 |     content_query: snippet
 509 |     disabled: true
 510 |     about:
 511 |       website: https://crowdview.ai/
 512 | 
 513 |   - name: yep
 514 |     engine: yep
 515 |     shortcut: yep
 516 |     categories: general
 517 |     search_type: web
 518 |     timeout: 5
 519 |     disabled: true
 520 | 
 521 |   - name: yep images
 522 |     engine: yep
 523 |     shortcut: yepi
 524 |     categories: images
 525 |     search_type: images
 526 |     disabled: true
 527 | 
 528 |   - name: yep news
 529 |     engine: yep
 530 |     shortcut: yepn
 531 |     categories: news
 532 |     search_type: news
 533 |     disabled: true
 534 | 
 535 |   - name: curlie
 536 |     engine: xpath
 537 |     shortcut: cl
 538 |     categories: general
 539 |     disabled: true
 540 |     paging: true
 541 |     lang_all: ''
 542 |     search_url: https://curlie.org/search?q={query}&lang={lang}&start={pageno}&stime=92452189
 543 |     page_size: 20
 544 |     results_xpath: //div[@id="site-list-content"]/div[@class="site-item"]
 545 |     url_xpath: ./div[@class="title-and-desc"]/a/@href
 546 |     title_xpath: ./div[@class="title-and-desc"]/a/div
 547 |     content_xpath: ./div[@class="title-and-desc"]/div[@class="site-descr"]
 548 |     about:
 549 |       website: https://curlie.org/
 550 |       wikidata_id: Q60715723
 551 |       use_official_api: false
 552 |       require_api_key: false
 553 |       results: HTML
 554 | 
 555 |   - name: currency
 556 |     engine: currency_convert
 557 |     categories: general
 558 |     shortcut: cc
 559 | 
 560 |   - name: bahnhof
 561 |     engine: json_engine
 562 |     search_url: https://www.bahnhof.de/api/stations/search/{query}
 563 |     url_prefix: https://www.bahnhof.de/
 564 |     url_query: slug
 565 |     title_query: name
 566 |     content_query: state
 567 |     shortcut: bf
 568 |     disabled: true
 569 |     about:
 570 |       website: https://www.bahn.de
 571 |       wikidata_id: Q22811603
 572 |       use_official_api: false
 573 |       require_api_key: false
 574 |       results: JSON
 575 |       language: de
 576 |     tests:
 577 |       bahnhof:
 578 |         matrix:
 579 |           query: berlin
 580 |           lang: en
 581 |         result_container:
 582 |           - not_empty
 583 |           - ['one_title_contains', 'Berlin Hauptbahnhof']
 584 |         test:
 585 |           - unique_results
 586 | 
 587 |   - name: deezer
 588 |     engine: deezer
 589 |     shortcut: dz
 590 |     disabled: true
 591 | 
 592 |   - name: destatis
 593 |     engine: destatis
 594 |     shortcut: destat
 595 |     disabled: true
 596 | 
 597 |   - name: deviantart
 598 |     engine: deviantart
 599 |     shortcut: da
 600 |     timeout: 3.0
 601 | 
 602 |   - name: ddg definitions
 603 |     engine: duckduckgo_definitions
 604 |     shortcut: ddd
 605 |     weight: 2
 606 |     disabled: true
 607 |     tests: *tests_infobox
 608 | 
 609 |   # cloudflare protected
 610 |   # - name: digbt
 611 |   #   engine: digbt
 612 |   #   shortcut: dbt
 613 |   #   timeout: 6.0
 614 |   #   disabled: true
 615 | 
 616 |   - name: docker hub
 617 |     engine: docker_hub
 618 |     shortcut: dh
 619 |     categories: [it, packages]
 620 | 
 621 |   - name: erowid
 622 |     engine: xpath
 623 |     paging: true
 624 |     first_page_num: 0
 625 |     page_size: 30
 626 |     search_url: https://www.erowid.org/search.php?q={query}&s={pageno}
 627 |     url_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/@href
 628 |     title_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/text()
 629 |     content_xpath: //dl[@class="results-list"]/dd[@class="result-details"]
 630 |     categories: []
 631 |     shortcut: ew
 632 |     disabled: true
 633 |     about:
 634 |       website: https://www.erowid.org/
 635 |       wikidata_id: Q1430691
 636 |       official_api_documentation:
 637 |       use_official_api: false
 638 |       require_api_key: false
 639 |       results: HTML
 640 | 
 641 |   # - name: elasticsearch
 642 |   #   shortcut: es
 643 |   #   engine: elasticsearch
 644 |   #   base_url: http://localhost:9200
 645 |   #   username: elastic
 646 |   #   password: changeme
 647 |   #   index: my-index
 648 |   #   # available options: match, simple_query_string, term, terms, custom
 649 |   #   query_type: match
 650 |   #   # if query_type is set to custom, provide your query here
 651 |   #   #custom_query_json: {"query":{"match_all": {}}}
 652 |   #   #show_metadata: false
 653 |   #   disabled: true
 654 | 
 655 |   - name: wikidata
 656 |     engine: wikidata
 657 |     shortcut: wd
 658 |     timeout: 3.0
 659 |     weight: 2
 660 |     # add "list" to the array to get results in the results list
 661 |     display_type: ["infobox"]
 662 |     tests: *tests_infobox
 663 |     categories: [general]
 664 | 
 665 |   - name: duckduckgo
 666 |     engine: duckduckgo
 667 |     shortcut: ddg
 668 | 
 669 |   - name: duckduckgo images
 670 |     engine: duckduckgo_extra
 671 |     categories: [images, web]
 672 |     ddg_category: images
 673 |     shortcut: ddi
 674 |     disabled: true
 675 | 
 676 |   - name: duckduckgo videos
 677 |     engine: duckduckgo_extra
 678 |     categories: [videos, web]
 679 |     ddg_category: videos
 680 |     shortcut: ddv
 681 |     disabled: true
 682 | 
 683 |   - name: duckduckgo news
 684 |     engine: duckduckgo_extra
 685 |     categories: [news, web]
 686 |     ddg_category: news
 687 |     shortcut: ddn
 688 |     disabled: true
 689 | 
 690 |   - name: duckduckgo weather
 691 |     engine: duckduckgo_weather
 692 |     shortcut: ddw
 693 |     disabled: true
 694 | 
 695 |   - name: apple maps
 696 |     engine: apple_maps
 697 |     shortcut: apm
 698 |     disabled: true
 699 |     timeout: 5.0
 700 | 
 701 |   - name: emojipedia
 702 |     engine: emojipedia
 703 |     timeout: 4.0
 704 |     shortcut: em
 705 |     disabled: true
 706 | 
 707 |   - name: tineye
 708 |     engine: tineye
 709 |     shortcut: tin
 710 |     timeout: 9.0
 711 |     disabled: true
 712 | 
 713 |   - name: etymonline
 714 |     engine: xpath
 715 |     paging: true
 716 |     search_url: https://etymonline.com/search?page={pageno}&q={query}
 717 |     url_xpath: //a[contains(@class, "word__name--")]/@href
 718 |     title_xpath: //a[contains(@class, "word__name--")]
 719 |     content_xpath: //section[contains(@class, "word__defination")]
 720 |     first_page_num: 1
 721 |     shortcut: et
 722 |     categories: [dictionaries]
 723 |     about:
 724 |       website: https://www.etymonline.com/
 725 |       wikidata_id: Q1188617
 726 |       official_api_documentation:
 727 |       use_official_api: false
 728 |       require_api_key: false
 729 |       results: HTML
 730 | 
 731 |   # - name: ebay
 732 |   #   engine: ebay
 733 |   #   shortcut: eb
 734 |   #   base_url: 'https://www.ebay.com'
 735 |   #   disabled: true
 736 |   #   timeout: 5
 737 | 
 738 |   - name: 1x
 739 |     engine: www1x
 740 |     shortcut: 1x
 741 |     timeout: 3.0
 742 |     disabled: true
 743 | 
 744 |   - name: fdroid
 745 |     engine: fdroid
 746 |     shortcut: fd
 747 |     disabled: true
 748 | 
 749 |   - name: findthatmeme
 750 |     engine: findthatmeme
 751 |     shortcut: ftm
 752 |     disabled: true
 753 | 
 754 |   - name: flickr
 755 |     categories: images
 756 |     shortcut: fl
 757 |     # You can use the engine using the official stable API, but you need an API
 758 |     # key, see: https://www.flickr.com/services/apps/create/
 759 |     # engine: flickr
 760 |     # api_key: 'apikey' # required!
 761 |     # Or you can use the html non-stable engine, activated by default
 762 |     engine: flickr_noapi
 763 | 
 764 |   - name: free software directory
 765 |     engine: mediawiki
 766 |     shortcut: fsd
 767 |     categories: [it, software wikis]
 768 |     base_url: https://directory.fsf.org/
 769 |     search_type: title
 770 |     timeout: 5.0
 771 |     disabled: true
 772 |     about:
 773 |       website: https://directory.fsf.org/
 774 |       wikidata_id: Q2470288
 775 | 
 776 |   # - name: freesound
 777 |   #   engine: freesound
 778 |   #   shortcut: fnd
 779 |   #   disabled: true
 780 |   #   timeout: 15.0
 781 |   # API key required, see: https://freesound.org/docs/api/overview.html
 782 |   #   api_key: MyAPIkey
 783 | 
 784 |   - name: frinkiac
 785 |     engine: frinkiac
 786 |     shortcut: frk
 787 |     disabled: true
 788 | 
 789 |   - name: fyyd
 790 |     engine: fyyd
 791 |     shortcut: fy
 792 |     timeout: 8.0
 793 |     disabled: true
 794 | 
 795 |   - name: genius
 796 |     engine: genius
 797 |     shortcut: gen
 798 | 
 799 |   - name: gentoo
 800 |     engine: mediawiki
 801 |     shortcut: ge
 802 |     categories: ["it", "software wikis"]
 803 |     base_url: "https://wiki.gentoo.org/"
 804 |     api_path: "api.php"
 805 |     search_type: text
 806 |     timeout: 10
 807 | 
 808 |   - name: gitlab
 809 |     engine: json_engine
 810 |     paging: true
 811 |     search_url: https://gitlab.com/api/v4/projects?search={query}&page={pageno}
 812 |     url_query: web_url
 813 |     title_query: name_with_namespace
 814 |     content_query: description
 815 |     page_size: 20
 816 |     categories: [it, repos]
 817 |     shortcut: gl
 818 |     timeout: 10.0
 819 |     disabled: true
 820 |     about:
 821 |       website: https://about.gitlab.com/
 822 |       wikidata_id: Q16639197
 823 |       official_api_documentation: https://docs.gitlab.com/ee/api/
 824 |       use_official_api: false
 825 |       require_api_key: false
 826 |       results: JSON
 827 | 
 828 |   - name: github
 829 |     engine: github
 830 |     shortcut: gh
 831 | 
 832 |   - name: codeberg
 833 |     # https://docs.searxng.org/dev/engines/online/gitea.html
 834 |     engine: gitea
 835 |     base_url: https://codeberg.org
 836 |     shortcut: cb
 837 |     disabled: true
 838 | 
 839 |   - name: gitea.com
 840 |     engine: gitea
 841 |     base_url: https://gitea.com
 842 |     shortcut: gitea
 843 |     disabled: true
 844 | 
 845 |   - name: goodreads
 846 |     engine: goodreads
 847 |     shortcut: good
 848 |     timeout: 4.0
 849 |     disabled: true
 850 | 
 851 |   - name: google
 852 |     engine: google
 853 |     shortcut: go
 854 |     # additional_tests:
 855 |     #   android: *test_android
 856 | 
 857 |   - name: google images
 858 |     engine: google_images
 859 |     shortcut: goi
 860 |     # additional_tests:
 861 |     #   android: *test_android
 862 |     #   dali:
 863 |     #     matrix:
 864 |     #       query: ['Dali Christ']
 865 |     #       lang: ['en', 'de', 'fr', 'zh-CN']
 866 |     #     result_container:
 867 |     #       - ['one_title_contains', 'Salvador']
 868 | 
 869 |   - name: google news
 870 |     engine: google_news
 871 |     shortcut: gon
 872 |     # additional_tests:
 873 |     #   android: *test_android
 874 | 
 875 |   - name: google videos
 876 |     engine: google_videos
 877 |     shortcut: gov
 878 |     # additional_tests:
 879 |     #   android: *test_android
 880 | 
 881 |   - name: google scholar
 882 |     engine: google_scholar
 883 |     shortcut: gos
 884 | 
 885 |   - name: google play apps
 886 |     engine: google_play
 887 |     categories: [files, apps]
 888 |     shortcut: gpa
 889 |     play_categ: apps
 890 |     disabled: true
 891 | 
 892 |   - name: google play movies
 893 |     engine: google_play
 894 |     categories: videos
 895 |     shortcut: gpm
 896 |     play_categ: movies
 897 |     disabled: true
 898 | 
 899 |   - name: material icons
 900 |     engine: material_icons
 901 |     categories: images
 902 |     shortcut: mi
 903 |     disabled: true
 904 | 
 905 |   - name: gpodder
 906 |     engine: json_engine
 907 |     shortcut: gpod
 908 |     timeout: 4.0
 909 |     paging: false
 910 |     search_url: https://gpodder.net/search.json?q={query}
 911 |     url_query: url
 912 |     title_query: title
 913 |     content_query: description
 914 |     page_size: 19
 915 |     categories: music
 916 |     disabled: true
 917 |     about:
 918 |       website: https://gpodder.net
 919 |       wikidata_id: Q3093354
 920 |       official_api_documentation: https://gpoddernet.readthedocs.io/en/latest/api/
 921 |       use_official_api: false
 922 |       requires_api_key: false
 923 |       results: JSON
 924 | 
 925 |   - name: habrahabr
 926 |     engine: xpath
 927 |     paging: true
 928 |     search_url: https://habr.com/en/search/page{pageno}/?q={query}
 929 |     results_xpath: //article[contains(@class, "tm-articles-list__item")]
 930 |     url_xpath: .//a[@class="tm-title__link"]/@href
 931 |     title_xpath: .//a[@class="tm-title__link"]
 932 |     content_xpath: .//div[contains(@class, "article-formatted-body")]
 933 |     categories: it
 934 |     timeout: 4.0
 935 |     disabled: true
 936 |     shortcut: habr
 937 |     about:
 938 |       website: https://habr.com/
 939 |       wikidata_id: Q4494434
 940 |       official_api_documentation: https://habr.com/en/docs/help/api/
 941 |       use_official_api: false
 942 |       require_api_key: false
 943 |       results: HTML
 944 | 
 945 |   - name: hackernews
 946 |     engine: hackernews
 947 |     shortcut: hn
 948 |     disabled: true
 949 | 
 950 |   - name: hex
 951 |     engine: hex
 952 |     shortcut: hex
 953 |     disabled: true
 954 |     # Valid values: name inserted_at updated_at total_downloads recent_downloads
 955 |     sort_criteria: "recent_downloads"
 956 |     page_size: 10
 957 | 
 958 |   - name: crates.io
 959 |     engine: crates
 960 |     shortcut: crates
 961 |     disabled: true
 962 |     timeout: 6.0
 963 | 
 964 |   - name: hoogle
 965 |     engine: xpath
 966 |     search_url: https://hoogle.haskell.org/?hoogle={query}
 967 |     results_xpath: '//div[@class="result"]'
 968 |     title_xpath: './/div[@class="ans"]//a'
 969 |     url_xpath: './/div[@class="ans"]//a/@href'
 970 |     content_xpath: './/div[@class="from"]'
 971 |     page_size: 20
 972 |     categories: [it, packages]
 973 |     shortcut: ho
 974 |     about:
 975 |       website: https://hoogle.haskell.org/
 976 |       wikidata_id: Q34010
 977 |       official_api_documentation: https://hackage.haskell.org/api
 978 |       use_official_api: false
 979 |       require_api_key: false
 980 |       results: JSON
 981 | 
 982 |   - name: imdb
 983 |     engine: imdb
 984 |     shortcut: imdb
 985 |     timeout: 6.0
 986 |     disabled: true
 987 | 
 988 |   - name: imgur
 989 |     engine: imgur
 990 |     shortcut: img
 991 |     disabled: true
 992 | 
 993 |   - name: ina
 994 |     engine: ina
 995 |     shortcut: in
 996 |     timeout: 6.0
 997 |     disabled: true
 998 | 
 999 |   - name: invidious
1000 |     engine: invidious
1001 |     # Instanes will be selected randomly, see https://api.invidious.io/ for
1002 |     # instances that are stable (good uptime) and close to you.
1003 |     base_url:
1004 |       - https://invidious.io.lol
1005 |       - https://invidious.fdn.fr
1006 |       - https://yt.artemislena.eu
1007 |       - https://invidious.tiekoetter.com
1008 |       - https://invidious.flokinet.to
1009 |       - https://vid.puffyan.us
1010 |       - https://invidious.privacydev.net
1011 |       - https://inv.tux.pizza
1012 |     shortcut: iv
1013 |     timeout: 3.0
1014 |     disabled: true
1015 | 
1016 |   - name: jisho
1017 |     engine: jisho
1018 |     shortcut: js
1019 |     timeout: 3.0
1020 |     disabled: true
1021 | 
1022 |   - name: kickass
1023 |     engine: kickass
1024 |     base_url:
1025 |       - https://kickasstorrents.to
1026 |       - https://kickasstorrents.cr
1027 |       - https://kickasstorrent.cr
1028 |       - https://kickass.sx
1029 |       - https://kat.am
1030 |     shortcut: kc
1031 |     timeout: 4.0
1032 | 
1033 |   - name: lemmy communities
1034 |     engine: lemmy
1035 |     lemmy_type: Communities
1036 |     shortcut: leco
1037 | 
1038 |   - name: lemmy users
1039 |     engine: lemmy
1040 |     network: lemmy communities
1041 |     lemmy_type: Users
1042 |     shortcut: leus
1043 | 
1044 |   - name: lemmy posts
1045 |     engine: lemmy
1046 |     network: lemmy communities
1047 |     lemmy_type: Posts
1048 |     shortcut: lepo
1049 | 
1050 |   - name: lemmy comments
1051 |     engine: lemmy
1052 |     network: lemmy communities
1053 |     lemmy_type: Comments
1054 |     shortcut: lecom
1055 | 
1056 |   - name: library genesis
1057 |     engine: xpath
1058 |     # search_url: https://libgen.is/search.php?req={query}
1059 |     search_url: https://libgen.rs/search.php?req={query}
1060 |     url_xpath: //a[contains(@href,"book/index.php?md5")]/@href
1061 |     title_xpath: //a[contains(@href,"book/")]/text()[1]
1062 |     content_xpath: //td/a[1][contains(@href,"=author")]/text()
1063 |     categories: files
1064 |     timeout: 7.0
1065 |     disabled: true
1066 |     shortcut: lg
1067 |     about:
1068 |       website: https://libgen.fun/
1069 |       wikidata_id: Q22017206
1070 |       official_api_documentation:
1071 |       use_official_api: false
1072 |       require_api_key: false
1073 |       results: HTML
1074 | 
1075 |   - name: z-library
1076 |     engine: zlibrary
1077 |     shortcut: zlib
1078 |     categories: files
1079 |     timeout: 7.0
1080 | 
1081 |   - name: library of congress
1082 |     engine: loc
1083 |     shortcut: loc
1084 |     categories: images
1085 | 
1086 |   - name: libretranslate
1087 |     engine: libretranslate
1088 |     # https://github.com/LibreTranslate/LibreTranslate?tab=readme-ov-file#mirrors
1089 |     base_url:
1090 |       - https://translate.terraprint.co
1091 |       - https://trans.zillyhuhn.com
1092 |     # api_key: abc123
1093 |     shortcut: lt
1094 |     disabled: true
1095 | 
1096 |   - name: lingva
1097 |     engine: lingva
1098 |     shortcut: lv
1099 |     # set lingva instance in url, by default it will use the official instance
1100 |     # url: https://lingva.thedaviddelta.com
1101 | 
1102 |   - name: lobste.rs
1103 |     engine: xpath
1104 |     search_url: https://lobste.rs/search?q={query}&what=stories&order=relevance
1105 |     results_xpath: //li[contains(@class, "story")]
1106 |     url_xpath: .//a[@class="u-url"]/@href
1107 |     title_xpath: .//a[@class="u-url"]
1108 |     content_xpath: .//a[@class="domain"]
1109 |     categories: it
1110 |     shortcut: lo
1111 |     timeout: 5.0
1112 |     disabled: true
1113 |     about:
1114 |       website: https://lobste.rs/
1115 |       wikidata_id: Q60762874
1116 |       official_api_documentation:
1117 |       use_official_api: false
1118 |       require_api_key: false
1119 |       results: HTML
1120 | 
1121 |   - name: mastodon users
1122 |     engine: mastodon
1123 |     mastodon_type: accounts
1124 |     base_url: https://mastodon.social
1125 |     shortcut: mau
1126 | 
1127 |   - name: mastodon hashtags
1128 |     engine: mastodon
1129 |     mastodon_type: hashtags
1130 |     base_url: https://mastodon.social
1131 |     shortcut: mah
1132 | 
1133 |   # - name: matrixrooms
1134 |   #   engine: mrs
1135 |   #   # https://docs.searxng.org/dev/engines/online/mrs.html
1136 |   #   # base_url: https://mrs-api-host
1137 |   #   shortcut: mtrx
1138 |   #   disabled: true
1139 | 
1140 |   - name: mdn
1141 |     shortcut: mdn
1142 |     engine: json_engine
1143 |     categories: [it]
1144 |     paging: true
1145 |     search_url: https://developer.mozilla.org/api/v1/search?q={query}&page={pageno}
1146 |     results_query: documents
1147 |     url_query: mdn_url
1148 |     url_prefix: https://developer.mozilla.org
1149 |     title_query: title
1150 |     content_query: summary
1151 |     about:
1152 |       website: https://developer.mozilla.org
1153 |       wikidata_id: Q3273508
1154 |       official_api_documentation: null
1155 |       use_official_api: false
1156 |       require_api_key: false
1157 |       results: JSON
1158 | 
1159 |   - name: metacpan
1160 |     engine: metacpan
1161 |     shortcut: cpan
1162 |     disabled: true
1163 |     number_of_results: 20
1164 | 
1165 |   # - name: meilisearch
1166 |   #   engine: meilisearch
1167 |   #   shortcut: mes
1168 |   #   enable_http: true
1169 |   #   base_url: http://localhost:7700
1170 |   #   index: my-index
1171 | 
1172 |   - name: mixcloud
1173 |     engine: mixcloud
1174 |     shortcut: mc
1175 | 
1176 |   # MongoDB engine
1177 |   # Required dependency: pymongo
1178 |   # - name: mymongo
1179 |   #   engine: mongodb
1180 |   #   shortcut: md
1181 |   #   exact_match_only: false
1182 |   #   host: '127.0.0.1'
1183 |   #   port: 27017
1184 |   #   enable_http: true
1185 |   #   results_per_page: 20
1186 |   #   database: 'business'
1187 |   #   collection: 'reviews'  # name of the db collection
1188 |   #   key: 'name'  # key in the collection to search for
1189 | 
1190 |   - name: mozhi
1191 |     engine: mozhi
1192 |     base_url:
1193 |       - https://mozhi.aryak.me
1194 |       - https://translate.bus-hit.me
1195 |       - https://nyc1.mz.ggtyler.dev
1196 |     # mozhi_engine: google - see https://mozhi.aryak.me for supported engines
1197 |     timeout: 4.0
1198 |     shortcut: mz
1199 |     disabled: true
1200 | 
1201 |   - name: mwmbl
1202 |     engine: mwmbl
1203 |     # api_url: https://api.mwmbl.org
1204 |     shortcut: mwm
1205 |     disabled: true
1206 | 
1207 |   - name: npm
1208 |     engine: npm
1209 |     shortcut: npm
1210 |     timeout: 5.0
1211 |     disabled: true
1212 | 
1213 |   - name: nyaa
1214 |     engine: nyaa
1215 |     shortcut: nt
1216 |     disabled: true
1217 | 
1218 |   - name: mankier
1219 |     engine: json_engine
1220 |     search_url: https://www.mankier.com/api/v2/mans/?q={query}
1221 |     results_query: results
1222 |     url_query: url
1223 |     title_query: name
1224 |     content_query: description
1225 |     categories: it
1226 |     shortcut: man
1227 |     about:
1228 |       website: https://www.mankier.com/
1229 |       official_api_documentation: https://www.mankier.com/api
1230 |       use_official_api: true
1231 |       require_api_key: false
1232 |       results: JSON
1233 | 
1234 |   # read https://docs.searxng.org/dev/engines/online/mullvad_leta.html
1235 |   # - name: mullvadleta
1236 |   #   engine: mullvad_leta
1237 |   #   use_cache: true  # Only 100 non-cache searches per day, suggested only for private instances
1238 |   #   search_url: https://leta.mullvad.net
1239 |   #   categories: [general, web]
1240 |   #   shortcut: ml
1241 | 
1242 |   - name: odysee
1243 |     engine: odysee
1244 |     shortcut: od
1245 |     disabled: true
1246 | 
1247 |   - name: openairedatasets
1248 |     engine: json_engine
1249 |     paging: true
1250 |     search_url: https://api.openaire.eu/search/datasets?format=json&page={pageno}&size=10&title={query}
1251 |     results_query: response/results/result
1252 |     url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$
1253 |     title_query: metadata/oaf:entity/oaf:result/title/$
1254 |     content_query: metadata/oaf:entity/oaf:result/description/$
1255 |     content_html_to_text: true
1256 |     categories: "science"
1257 |     shortcut: oad
1258 |     timeout: 5.0
1259 |     about:
1260 |       website: https://www.openaire.eu/
1261 |       wikidata_id: Q25106053
1262 |       official_api_documentation: https://api.openaire.eu/
1263 |       use_official_api: false
1264 |       require_api_key: false
1265 |       results: JSON
1266 | 
1267 |   - name: openairepublications
1268 |     engine: json_engine
1269 |     paging: true
1270 |     search_url: https://api.openaire.eu/search/publications?format=json&page={pageno}&size=10&title={query}
1271 |     results_query: response/results/result
1272 |     url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$
1273 |     title_query: metadata/oaf:entity/oaf:result/title/$
1274 |     content_query: metadata/oaf:entity/oaf:result/description/$
1275 |     content_html_to_text: true
1276 |     categories: science
1277 |     shortcut: oap
1278 |     timeout: 5.0
1279 |     about:
1280 |       website: https://www.openaire.eu/
1281 |       wikidata_id: Q25106053
1282 |       official_api_documentation: https://api.openaire.eu/
1283 |       use_official_api: false
1284 |       require_api_key: false
1285 |       results: JSON
1286 | 
1287 |   - name: openmeteo
1288 |     engine: open_meteo
1289 |     shortcut: om
1290 |     disabled: true
1291 | 
1292 |   # - name: opensemanticsearch
1293 |   #   engine: opensemantic
1294 |   #   shortcut: oss
1295 |   #   base_url: 'http://localhost:8983/solr/opensemanticsearch/'
1296 | 
1297 |   - name: openstreetmap
1298 |     engine: openstreetmap
1299 |     shortcut: osm
1300 | 
1301 |   - name: openrepos
1302 |     engine: xpath
1303 |     paging: true
1304 |     search_url: https://openrepos.net/search/node/{query}?page={pageno}
1305 |     url_xpath: //li[@class="search-result"]//h3[@class="title"]/a/@href
1306 |     title_xpath: //li[@class="search-result"]//h3[@class="title"]/a
1307 |     content_xpath: //li[@class="search-result"]//div[@class="search-snippet-info"]//p[@class="search-snippet"]
1308 |     categories: files
1309 |     timeout: 4.0
1310 |     disabled: true
1311 |     shortcut: or
1312 |     about:
1313 |       website: https://openrepos.net/
1314 |       wikidata_id:
1315 |       official_api_documentation:
1316 |       use_official_api: false
1317 |       require_api_key: false
1318 |       results: HTML
1319 | 
1320 |   - name: packagist
1321 |     engine: json_engine
1322 |     paging: true
1323 |     search_url: https://packagist.org/search.json?q={query}&page={pageno}
1324 |     results_query: results
1325 |     url_query: url
1326 |     title_query: name
1327 |     content_query: description
1328 |     categories: [it, packages]
1329 |     disabled: true
1330 |     timeout: 5.0
1331 |     shortcut: pack
1332 |     about:
1333 |       website: https://packagist.org
1334 |       wikidata_id: Q108311377
1335 |       official_api_documentation: https://packagist.org/apidoc
1336 |       use_official_api: true
1337 |       require_api_key: false
1338 |       results: JSON
1339 | 
1340 |   - name: pdbe
1341 |     engine: pdbe
1342 |     shortcut: pdb
1343 |     # Hide obsolete PDB entries.  Default is not to hide obsolete structures
1344 |     #  hide_obsolete: false
1345 | 
1346 |   - name: photon
1347 |     engine: photon
1348 |     shortcut: ph
1349 | 
1350 |   - name: pinterest
1351 |     engine: pinterest
1352 |     shortcut: pin
1353 | 
1354 |   - name: piped
1355 |     engine: piped
1356 |     shortcut: ppd
1357 |     categories: videos
1358 |     piped_filter: videos
1359 |     timeout: 3.0
1360 | 
1361 |     # URL to use as link and for embeds
1362 |     frontend_url: https://srv.piped.video
1363 |     # Instance will be selected randomly, for more see https://piped-instances.kavin.rocks/
1364 |     backend_url:
1365 |       - https://pipedapi.kavin.rocks
1366 |       - https://pipedapi-libre.kavin.rocks
1367 |       - https://pipedapi.adminforge.de
1368 | 
1369 |   - name: piped.music
1370 |     engine: piped
1371 |     network: piped
1372 |     shortcut: ppdm
1373 |     categories: music
1374 |     piped_filter: music_songs
1375 |     timeout: 3.0
1376 | 
1377 |   - name: piratebay
1378 |     engine: piratebay
1379 |     shortcut: tpb
1380 |     # You may need to change this URL to a proxy if piratebay is blocked in your
1381 |     # country
1382 |     url: https://thepiratebay.org/
1383 |     timeout: 3.0
1384 | 
1385 |   - name: pixiv
1386 |     shortcut: pv
1387 |     engine: pixiv
1388 |     disabled: true
1389 |     inactive: true
1390 |     pixiv_image_proxies:
1391 |       - https://pximg.example.org
1392 |       # A proxy is required to load the images. Hosting an image proxy server
1393 |       # for Pixiv:
1394 |       #    --> https://pixivfe.pages.dev/hosting-image-proxy-server/
1395 |       # Proxies from public instances.  Ask the public instances owners if they
1396 |       # agree to receive traffic from SearXNG!
1397 |       #    --> https://codeberg.org/VnPower/PixivFE#instances
1398 |       #    --> https://github.com/searxng/searxng/pull/3192#issuecomment-1941095047
1399 |       # image proxy of https://pixiv.cat
1400 |       # - https://i.pixiv.cat
1401 |       # image proxy of https://www.pixiv.pics
1402 |       # - https://pximg.cocomi.eu.org
1403 |       # image proxy of https://pixivfe.exozy.me
1404 |       # - https://pximg.exozy.me
1405 |       # image proxy of https://pixivfe.ducks.party
1406 |       # - https://pixiv.ducks.party
1407 |       # image proxy of https://pixiv.perennialte.ch
1408 |       # - https://pximg.perennialte.ch
1409 | 
1410 |   - name: podcastindex
1411 |     engine: podcastindex
1412 |     shortcut: podcast
1413 | 
1414 |   # Required dependency: psychopg2
1415 |   #  - name: postgresql
1416 |   #    engine: postgresql
1417 |   #    database: postgres
1418 |   #    username: postgres
1419 |   #    password: postgres
1420 |   #    limit: 10
1421 |   #    query_str: 'SELECT * from my_table WHERE my_column = %(query)s'
1422 |   #    shortcut : psql
1423 | 
1424 |   - name: presearch
1425 |     engine: presearch
1426 |     search_type: search
1427 |     categories: [general, web]
1428 |     shortcut: ps
1429 |     timeout: 4.0
1430 |     disabled: true
1431 | 
1432 |   - name: presearch images
1433 |     engine: presearch
1434 |     network: presearch
1435 |     search_type: images
1436 |     categories: [images, web]
1437 |     timeout: 4.0
1438 |     shortcut: psimg
1439 |     disabled: true
1440 | 
1441 |   - name: presearch videos
1442 |     engine: presearch
1443 |     network: presearch
1444 |     search_type: videos
1445 |     categories: [general, web]
1446 |     timeout: 4.0
1447 |     shortcut: psvid
1448 |     disabled: true
1449 | 
1450 |   - name: presearch news
1451 |     engine: presearch
1452 |     network: presearch
1453 |     search_type: news
1454 |     categories: [news, web]
1455 |     timeout: 4.0
1456 |     shortcut: psnews
1457 |     disabled: true
1458 | 
1459 |   - name: pub.dev
1460 |     engine: xpath
1461 |     shortcut: pd
1462 |     search_url: https://pub.dev/packages?q={query}&page={pageno}
1463 |     paging: true
1464 |     results_xpath: //div[contains(@class,"packages-item")]
1465 |     url_xpath: ./div/h3/a/@href
1466 |     title_xpath: ./div/h3/a
1467 |     content_xpath: ./div/div/div[contains(@class,"packages-description")]/span
1468 |     categories: [packages, it]
1469 |     timeout: 3.0
1470 |     disabled: true
1471 |     first_page_num: 1
1472 |     about:
1473 |       website: https://pub.dev/
1474 |       official_api_documentation: https://pub.dev/help/api
1475 |       use_official_api: false
1476 |       require_api_key: false
1477 |       results: HTML
1478 | 
1479 |   - name: pubmed
1480 |     engine: pubmed
1481 |     shortcut: pub
1482 |     timeout: 3.0
1483 | 
1484 |   - name: pypi
1485 |     shortcut: pypi
1486 |     engine: pypi
1487 | 
1488 |   - name: qwant
1489 |     qwant_categ: web
1490 |     engine: qwant
1491 |     shortcut: qw
1492 |     categories: [general, web]
1493 |     additional_tests:
1494 |       rosebud: *test_rosebud
1495 | 
1496 |   - name: qwant news
1497 |     qwant_categ: news
1498 |     engine: qwant
1499 |     shortcut: qwn
1500 |     categories: news
1501 |     network: qwant
1502 | 
1503 |   - name: qwant images
1504 |     qwant_categ: images
1505 |     engine: qwant
1506 |     shortcut: qwi
1507 |     categories: [images, web]
1508 |     network: qwant
1509 | 
1510 |   - name: qwant videos
1511 |     qwant_categ: videos
1512 |     engine: qwant
1513 |     shortcut: qwv
1514 |     categories: [videos, web]
1515 |     network: qwant
1516 | 
1517 |   # - name: library
1518 |   #   engine: recoll
1519 |   #   shortcut: lib
1520 |   #   base_url: 'https://recoll.example.org/'
1521 |   #   search_dir: ''
1522 |   #   mount_prefix: /export
1523 |   #   dl_prefix: 'https://download.example.org'
1524 |   #   timeout: 30.0
1525 |   #   categories: files
1526 |   #   disabled: true
1527 | 
1528 |   # - name: recoll library reference
1529 |   #   engine: recoll
1530 |   #   base_url: 'https://recoll.example.org/'
1531 |   #   search_dir: reference
1532 |   #   mount_prefix: /export
1533 |   #   dl_prefix: 'https://download.example.org'
1534 |   #   shortcut: libr
1535 |   #   timeout: 30.0
1536 |   #   categories: files
1537 |   #   disabled: true
1538 | 
1539 |   - name: radio browser
1540 |     engine: radio_browser
1541 |     shortcut: rb
1542 | 
1543 |   - name: reddit
1544 |     engine: reddit
1545 |     shortcut: re
1546 |     page_size: 25
1547 |     disabled: true
1548 | 
1549 |   - name: rottentomatoes
1550 |     engine: rottentomatoes
1551 |     shortcut: rt
1552 |     disabled: true
1553 | 
1554 |   # Required dependency: redis
1555 |   # - name: myredis
1556 |   #   shortcut : rds
1557 |   #   engine: redis_server
1558 |   #   exact_match_only: false
1559 |   #   host: '127.0.0.1'
1560 |   #   port: 6379
1561 |   #   enable_http: true
1562 |   #   password: ''
1563 |   #   db: 0
1564 | 
1565 |   # tmp suspended: bad certificate
1566 |   #  - name: scanr structures
1567 |   #    shortcut: scs
1568 |   #    engine: scanr_structures
1569 |   #    disabled: true
1570 | 
1571 |   - name: searchmysite
1572 |     engine: xpath
1573 |     shortcut: sms
1574 |     categories: general
1575 |     paging: true
1576 |     search_url: https://searchmysite.net/search/?q={query}&page={pageno}
1577 |     results_xpath: //div[contains(@class,'search-result')]
1578 |     url_xpath: .//a[contains(@class,'result-link')]/@href
1579 |     title_xpath: .//span[contains(@class,'result-title-txt')]/text()
1580 |     content_xpath: ./p[@id='result-hightlight']
1581 |     disabled: true
1582 |     about:
1583 |       website: https://searchmysite.net
1584 | 
1585 |   - name: sepiasearch
1586 |     engine: sepiasearch
1587 |     shortcut: sep
1588 | 
1589 |   - name: soundcloud
1590 |     engine: soundcloud
1591 |     shortcut: sc
1592 | 
1593 |   - name: stackoverflow
1594 |     engine: stackexchange
1595 |     shortcut: st
1596 |     api_site: 'stackoverflow'
1597 |     categories: [it, q&a]
1598 | 
1599 |   - name: askubuntu
1600 |     engine: stackexchange
1601 |     shortcut: ubuntu
1602 |     api_site: 'askubuntu'
1603 |     categories: [it, q&a]
1604 | 
1605 |   - name: internetarchivescholar
1606 |     engine: internet_archive_scholar
1607 |     shortcut: ias
1608 |     timeout: 15.0
1609 | 
1610 |   - name: superuser
1611 |     engine: stackexchange
1612 |     shortcut: su
1613 |     api_site: 'superuser'
1614 |     categories: [it, q&a]
1615 | 
1616 |   - name: discuss.python
1617 |     engine: discourse
1618 |     shortcut: dpy
1619 |     base_url: 'https://discuss.python.org'
1620 |     categories: [it, q&a]
1621 |     disabled: true
1622 | 
1623 |   - name: caddy.community
1624 |     engine: discourse
1625 |     shortcut: caddy
1626 |     base_url: 'https://caddy.community'
1627 |     categories: [it, q&a]
1628 |     disabled: true
1629 | 
1630 |   - name: pi-hole.community
1631 |     engine: discourse
1632 |     shortcut: pi
1633 |     categories: [it, q&a]
1634 |     base_url: 'https://discourse.pi-hole.net'
1635 |     disabled: true
1636 | 
1637 |   - name: searchcode code
1638 |     engine: searchcode_code
1639 |     shortcut: scc
1640 |     disabled: true
1641 | 
1642 |   # - name: searx
1643 |   #   engine: searx_engine
1644 |   #   shortcut: se
1645 |   #   instance_urls :
1646 |   #       - http://127.0.0.1:8888/
1647 |   #       - ...
1648 |   #   disabled: true
1649 | 
1650 |   - name: semantic scholar
1651 |     engine: semantic_scholar
1652 |     disabled: true
1653 |     shortcut: se
1654 | 
1655 |   # Spotify needs API credentials
1656 |   # - name: spotify
1657 |   #   engine: spotify
1658 |   #   shortcut: stf
1659 |   #   api_client_id: *******
1660 |   #   api_client_secret: *******
1661 | 
1662 |   # - name: solr
1663 |   #   engine: solr
1664 |   #   shortcut: slr
1665 |   #   base_url: http://localhost:8983
1666 |   #   collection: collection_name
1667 |   #   sort: '' # sorting: asc or desc
1668 |   #   field_list: '' # comma separated list of field names to display on the UI
1669 |   #   default_fields: '' # default field to query
1670 |   #   query_fields: '' # query fields
1671 |   #   enable_http: true
1672 | 
1673 |   # - name: springer nature
1674 |   #   engine: springer
1675 |   #   # get your API key from: https://dev.springernature.com/signup
1676 |   #   # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601"
1677 |   #   api_key: 'unset'
1678 |   #   shortcut: springer
1679 |   #   timeout: 15.0
1680 | 
1681 |   - name: startpage
1682 |     engine: startpage
1683 |     shortcut: sp
1684 |     timeout: 6.0
1685 |     disabled: true
1686 |     additional_tests:
1687 |       rosebud: *test_rosebud
1688 | 
1689 |   - name: tokyotoshokan
1690 |     engine: tokyotoshokan
1691 |     shortcut: tt
1692 |     timeout: 6.0
1693 |     disabled: true
1694 | 
1695 |   - name: solidtorrents
1696 |     engine: solidtorrents
1697 |     shortcut: solid
1698 |     timeout: 4.0
1699 |     base_url:
1700 |       - https://solidtorrents.to
1701 |       - https://bitsearch.to
1702 | 
1703 |   # For this demo of the sqlite engine download:
1704 |   #   https://liste.mediathekview.de/filmliste-v2.db.bz2
1705 |   # and unpack into searx/data/filmliste-v2.db
1706 |   # Query to test: "!demo concert"
1707 |   #
1708 |   # - name: demo
1709 |   #   engine: sqlite
1710 |   #   shortcut: demo
1711 |   #   categories: general
1712 |   #   result_template: default.html
1713 |   #   database: searx/data/filmliste-v2.db
1714 |   #   query_str:  >-
1715 |   #     SELECT title || ' (' || time(duration, 'unixepoch') || ')' AS title,
1716 |   #            COALESCE( NULLIF(url_video_hd,''), NULLIF(url_video_sd,''), url_video) AS url,
1717 |   #            description AS content
1718 |   #       FROM film
1719 |   #      WHERE title LIKE :wildcard OR description LIKE :wildcard
1720 |   #      ORDER BY duration DESC
1721 | 
1722 |   - name: tagesschau
1723 |     engine: tagesschau
1724 |     # when set to false, display URLs from Tagesschau, and not the actual source
1725 |     # (e.g. NDR, WDR, SWR, HR, ...)
1726 |     use_source_url: true
1727 |     shortcut: ts
1728 |     disabled: true
1729 | 
1730 |   - name: tmdb
1731 |     engine: xpath
1732 |     paging: true
1733 |     categories: movies
1734 |     search_url: https://www.themoviedb.org/search?page={pageno}&query={query}
1735 |     results_xpath: //div[contains(@class,"movie") or contains(@class,"tv")]//div[contains(@class,"card")]
1736 |     url_xpath: .//div[contains(@class,"poster")]/a/@href
1737 |     thumbnail_xpath: .//img/@src
1738 |     title_xpath: .//div[contains(@class,"title")]//h2
1739 |     content_xpath: .//div[contains(@class,"overview")]
1740 |     shortcut: tm
1741 |     disabled: true
1742 | 
1743 |   # Requires Tor
1744 |   - name: torch
1745 |     engine: xpath
1746 |     paging: true
1747 |     search_url:
1748 |       http://xmh57jrknzkhv6y3ls3ubitzfqnkrwxhopf5aygthi7d6rplyvk3noyd.onion/cgi-bin/omega/omega?P={query}&DEFAULTOP=and
1749 |     results_xpath: //table//tr
1750 |     url_xpath: ./td[2]/a
1751 |     title_xpath: ./td[2]/b
1752 |     content_xpath: ./td[2]/small
1753 |     categories: onions
1754 |     enable_http: true
1755 |     shortcut: tch
1756 | 
1757 |   # torznab engine lets you query any torznab compatible indexer.  Using this
1758 |   # engine in combination with Jackett opens the possibility to query a lot of
1759 |   # public and private indexers directly from SearXNG. More details at:
1760 |   # https://docs.searxng.org/dev/engines/online/torznab.html
1761 |   #
1762 |   # - name: Torznab EZTV
1763 |   #   engine: torznab
1764 |   #   shortcut: eztv
1765 |   #   base_url: http://localhost:9117/api/v2.0/indexers/eztv/results/torznab
1766 |   #   enable_http: true  # if using localhost
1767 |   #   api_key: xxxxxxxxxxxxxxx
1768 |   #   show_magnet_links: true
1769 |   #   show_torrent_files: false
1770 |   #   # https://github.com/Jackett/Jackett/wiki/Jackett-Categories
1771 |   #   torznab_categories:  # optional
1772 |   #     - 2000
1773 |   #     - 5000
1774 | 
1775 |   # tmp suspended - too slow, too many errors
1776 |   #  - name: urbandictionary
1777 |   #    engine      : xpath
1778 |   #    search_url  : https://www.urbandictionary.com/define.php?term={query}
1779 |   #    url_xpath   : //*[@class="word"]/@href
1780 |   #    title_xpath : //*[@class="def-header"]
1781 |   #    content_xpath: //*[@class="meaning"]
1782 |   #    shortcut: ud
1783 | 
1784 |   - name: unsplash
1785 |     engine: unsplash
1786 |     shortcut: us
1787 | 
1788 |   - name: yandex music
1789 |     engine: yandex_music
1790 |     shortcut: ydm
1791 |     disabled: true
1792 |     # https://yandex.com/support/music/access.html
1793 |     inactive: true
1794 | 
1795 |   - name: yahoo
1796 |     engine: yahoo
1797 |     shortcut: yh
1798 |     disabled: true
1799 | 
1800 |   - name: yahoo news
1801 |     engine: yahoo_news
1802 |     shortcut: yhn
1803 | 
1804 |   - name: youtube
1805 |     shortcut: yt
1806 |     # You can use the engine using the official stable API, but you need an API
1807 |     # key See: https://console.developers.google.com/project
1808 |     #
1809 |     # engine: youtube_api
1810 |     # api_key: 'apikey' # required!
1811 |     #
1812 |     # Or you can use the html non-stable engine, activated by default
1813 |     engine: youtube_noapi
1814 | 
1815 |   - name: dailymotion
1816 |     engine: dailymotion
1817 |     shortcut: dm
1818 | 
1819 |   - name: vimeo
1820 |     engine: vimeo
1821 |     shortcut: vm
1822 | 
1823 |   - name: wiby
1824 |     engine: json_engine
1825 |     paging: true
1826 |     search_url: https://wiby.me/json/?q={query}&p={pageno}
1827 |     url_query: URL
1828 |     title_query: Title
1829 |     content_query: Snippet
1830 |     categories: [general, web]
1831 |     shortcut: wib
1832 |     disabled: true
1833 |     about:
1834 |       website: https://wiby.me/
1835 | 
1836 |   - name: alexandria
1837 |     engine: json_engine
1838 |     shortcut: alx
1839 |     categories: general
1840 |     paging: true
1841 |     search_url: https://api.alexandria.org/?a=1&q={query}&p={pageno}
1842 |     results_query: results
1843 |     title_query: title
1844 |     url_query: url
1845 |     content_query: snippet
1846 |     timeout: 1.5
1847 |     disabled: true
1848 |     about:
1849 |       website: https://alexandria.org/
1850 |       official_api_documentation: https://github.com/alexandria-org/alexandria-api/raw/master/README.md
1851 |       use_official_api: true
1852 |       require_api_key: false
1853 |       results: JSON
1854 | 
1855 |   - name: wikibooks
1856 |     engine: mediawiki
1857 |     weight: 0.5
1858 |     shortcut: wb
1859 |     categories: [general, wikimedia]
1860 |     base_url: "https://{language}.wikibooks.org/"
1861 |     search_type: text
1862 |     disabled: true
1863 |     about:
1864 |       website: https://www.wikibooks.org/
1865 |       wikidata_id: Q367
1866 | 
1867 |   - name: wikinews
1868 |     engine: mediawiki
1869 |     shortcut: wn
1870 |     categories: [news, wikimedia]
1871 |     base_url: "https://{language}.wikinews.org/"
1872 |     search_type: text
1873 |     srsort: create_timestamp_desc
1874 |     about:
1875 |       website: https://www.wikinews.org/
1876 |       wikidata_id: Q964
1877 | 
1878 |   - name: wikiquote
1879 |     engine: mediawiki
1880 |     weight: 0.5
1881 |     shortcut: wq
1882 |     categories: [general, wikimedia]
1883 |     base_url: "https://{language}.wikiquote.org/"
1884 |     search_type: text
1885 |     disabled: true
1886 |     additional_tests:
1887 |       rosebud: *test_rosebud
1888 |     about:
1889 |       website: https://www.wikiquote.org/
1890 |       wikidata_id: Q369
1891 | 
1892 |   - name: wikisource
1893 |     engine: mediawiki
1894 |     weight: 0.5
1895 |     shortcut: ws
1896 |     categories: [general, wikimedia]
1897 |     base_url: "https://{language}.wikisource.org/"
1898 |     search_type: text
1899 |     disabled: true
1900 |     about:
1901 |       website: https://www.wikisource.org/
1902 |       wikidata_id: Q263
1903 | 
1904 |   - name: wikispecies
1905 |     engine: mediawiki
1906 |     shortcut: wsp
1907 |     categories: [general, science, wikimedia]
1908 |     base_url: "https://species.wikimedia.org/"
1909 |     search_type: text
1910 |     disabled: true
1911 |     about:
1912 |       website: https://species.wikimedia.org/
1913 |       wikidata_id: Q13679
1914 |     tests:
1915 |       wikispecies:
1916 |         matrix:
1917 |           query: "Campbell, L.I. et al. 2011: MicroRNAs"
1918 |           lang: en
1919 |         result_container:
1920 |           - not_empty
1921 |           - ['one_title_contains', 'Tardigrada']
1922 |         test:
1923 |           - unique_results
1924 | 
1925 |   - name: wiktionary
1926 |     engine: mediawiki
1927 |     shortcut: wt
1928 |     categories: [dictionaries, wikimedia]
1929 |     base_url: "https://{language}.wiktionary.org/"
1930 |     search_type: text
1931 |     about:
1932 |       website: https://www.wiktionary.org/
1933 |       wikidata_id: Q151
1934 | 
1935 |   - name: wikiversity
1936 |     engine: mediawiki
1937 |     weight: 0.5
1938 |     shortcut: wv
1939 |     categories: [general, wikimedia]
1940 |     base_url: "https://{language}.wikiversity.org/"
1941 |     search_type: text
1942 |     disabled: true
1943 |     about:
1944 |       website: https://www.wikiversity.org/
1945 |       wikidata_id: Q370
1946 | 
1947 |   - name: wikivoyage
1948 |     engine: mediawiki
1949 |     weight: 0.5
1950 |     shortcut: wy
1951 |     categories: [general, wikimedia]
1952 |     base_url: "https://{language}.wikivoyage.org/"
1953 |     search_type: text
1954 |     disabled: true
1955 |     about:
1956 |       website: https://www.wikivoyage.org/
1957 |       wikidata_id: Q373
1958 | 
1959 |   - name: wikicommons.images
1960 |     engine: wikicommons
1961 |     shortcut: wc
1962 |     categories: images
1963 |     search_type: images
1964 |     number_of_results: 10
1965 | 
1966 |   - name: wikicommons.videos
1967 |     engine: wikicommons
1968 |     shortcut: wcv
1969 |     categories: videos
1970 |     search_type: videos
1971 |     number_of_results: 10
1972 | 
1973 |   - name: wikicommons.audio
1974 |     engine: wikicommons
1975 |     shortcut: wca
1976 |     categories: music
1977 |     search_type: audio
1978 |     number_of_results: 10
1979 | 
1980 |   - name: wikicommons.files
1981 |     engine: wikicommons
1982 |     shortcut: wcf
1983 |     categories: files
1984 |     search_type: files
1985 |     number_of_results: 10
1986 | 
1987 |   - name: wolframalpha
1988 |     shortcut: wa
1989 |     # You can use the engine using the official stable API, but you need an API
1990 |     # key.  See: https://products.wolframalpha.com/api/
1991 |     #
1992 |     # engine: wolframalpha_api
1993 |     # api_key: ''
1994 |     #
1995 |     # Or you can use the html non-stable engine, activated by default
1996 |     engine: wolframalpha_noapi
1997 |     timeout: 6.0
1998 |     categories: general
1999 |     disabled: true
2000 | 
2001 |   - name: dictzone
2002 |     engine: dictzone
2003 |     shortcut: dc
2004 | 
2005 |   - name: mymemory translated
2006 |     engine: translated
2007 |     shortcut: tl
2008 |     timeout: 5.0
2009 |     # You can use without an API key, but you are limited to 1000 words/day
2010 |     # See: https://mymemory.translated.net/doc/usagelimits.php
2011 |     # api_key: ''
2012 | 
2013 |   # Required dependency: mysql-connector-python
2014 |   #  - name: mysql
2015 |   #    engine: mysql_server
2016 |   #    database: mydatabase
2017 |   #    username: user
2018 |   #    password: pass
2019 |   #    limit: 10
2020 |   #    query_str: 'SELECT * from mytable WHERE fieldname=%(query)s'
2021 |   #    shortcut: mysql
2022 | 
2023 |   - name: 1337x
2024 |     engine: 1337x
2025 |     shortcut: 1337x
2026 |     disabled: true
2027 | 
2028 |   - name: duden
2029 |     engine: duden
2030 |     shortcut: du
2031 |     disabled: true
2032 | 
2033 |   - name: seznam
2034 |     shortcut: szn
2035 |     engine: seznam
2036 |     disabled: true
2037 | 
2038 |   # - name: deepl
2039 |   #   engine: deepl
2040 |   #   shortcut: dpl
2041 |   #   # You can use the engine using the official stable API, but you need an API key
2042 |   #   # See: https://www.deepl.com/pro-api?cta=header-pro-api
2043 |   #   api_key: ''  # required!
2044 |   #   timeout: 5.0
2045 |   #   disabled: true
2046 | 
2047 |   - name: mojeek
2048 |     shortcut: mjk
2049 |     engine: mojeek
2050 |     categories: [general, web]
2051 |     disabled: true
2052 | 
2053 |   - name: mojeek images
2054 |     shortcut: mjkimg
2055 |     engine: mojeek
2056 |     categories: [images, web]
2057 |     search_type: images
2058 |     paging: false
2059 |     disabled: true
2060 | 
2061 |   - name: mojeek news
2062 |     shortcut: mjknews
2063 |     engine: mojeek
2064 |     categories: [news, web]
2065 |     search_type: news
2066 |     paging: false
2067 |     disabled: true
2068 | 
2069 |   - name: moviepilot
2070 |     engine: moviepilot
2071 |     shortcut: mp
2072 |     disabled: true
2073 | 
2074 |   - name: naver
2075 |     shortcut: nvr
2076 |     categories: [general, web]
2077 |     engine: xpath
2078 |     paging: true
2079 |     search_url: https://search.naver.com/search.naver?where=webkr&sm=osp_hty&ie=UTF-8&query={query}&start={pageno}
2080 |     url_xpath: //a[@class="link_tit"]/@href
2081 |     title_xpath: //a[@class="link_tit"]
2082 |     content_xpath: //div[@class="total_dsc_wrap"]/a
2083 |     first_page_num: 1
2084 |     page_size: 10
2085 |     disabled: true
2086 |     about:
2087 |       website: https://www.naver.com/
2088 |       wikidata_id: Q485639
2089 |       official_api_documentation: https://developers.naver.com/docs/nmt/examples/
2090 |       use_official_api: false
2091 |       require_api_key: false
2092 |       results: HTML
2093 |       language: ko
2094 | 
2095 |   - name: rubygems
2096 |     shortcut: rbg
2097 |     engine: xpath
2098 |     paging: true
2099 |     search_url: https://rubygems.org/search?page={pageno}&query={query}
2100 |     results_xpath: /html/body/main/div/a[@class="gems__gem"]
2101 |     url_xpath: ./@href
2102 |     title_xpath: ./span/h2
2103 |     content_xpath: ./span/p
2104 |     suggestion_xpath: /html/body/main/div/div[@class="search__suggestions"]/p/a
2105 |     first_page_num: 1
2106 |     categories: [it, packages]
2107 |     disabled: true
2108 |     about:
2109 |       website: https://rubygems.org/
2110 |       wikidata_id: Q1853420
2111 |       official_api_documentation: https://guides.rubygems.org/rubygems-org-api/
2112 |       use_official_api: false
2113 |       require_api_key: false
2114 |       results: HTML
2115 | 
2116 |   - name: peertube
2117 |     engine: peertube
2118 |     shortcut: ptb
2119 |     paging: true
2120 |     # alternatives see: https://instances.joinpeertube.org/instances
2121 |     # base_url: https://tube.4aem.com
2122 |     categories: videos
2123 |     disabled: true
2124 |     timeout: 6.0
2125 | 
2126 |   - name: mediathekviewweb
2127 |     engine: mediathekviewweb
2128 |     shortcut: mvw
2129 |     disabled: true
2130 | 
2131 |   - name: yacy
2132 |     engine: yacy
2133 |     categories: general
2134 |     search_type: text
2135 |     base_url:
2136 |       - https://yacy.searchlab.eu
2137 |       - https://search.lomig.me
2138 |       - https://yacy.ecosys.eu
2139 |       - https://search.webproject.link
2140 |     shortcut: ya
2141 |     disabled: true
2142 |     # required if you aren't using HTTPS for your local yacy instance
2143 |     # https://docs.searxng.org/dev/engines/online/yacy.html
2144 |     # enable_http: true
2145 |     # timeout: 3.0
2146 |     # search_mode: 'global'
2147 | 
2148 |   - name: yacy images
2149 |     engine: yacy
2150 |     categories: images
2151 |     search_type: image
2152 |     shortcut: yai
2153 |     disabled: true
2154 | 
2155 |   - name: rumble
2156 |     engine: rumble
2157 |     shortcut: ru
2158 |     base_url: https://rumble.com/
2159 |     paging: true
2160 |     categories: videos
2161 |     disabled: true
2162 | 
2163 |   - name: livespace
2164 |     engine: livespace
2165 |     shortcut: ls
2166 |     categories: videos
2167 |     disabled: true
2168 |     timeout: 5.0
2169 | 
2170 |   - name: wordnik
2171 |     engine: wordnik
2172 |     shortcut: def
2173 |     base_url: https://www.wordnik.com/
2174 |     categories: [dictionaries]
2175 |     timeout: 5.0
2176 | 
2177 |   - name: woxikon.de synonyme
2178 |     engine: xpath
2179 |     shortcut: woxi
2180 |     categories: [dictionaries]
2181 |     timeout: 5.0
2182 |     disabled: true
2183 |     search_url: https://synonyme.woxikon.de/synonyme/{query}.php
2184 |     url_xpath: //div[@class="upper-synonyms"]/a/@href
2185 |     content_xpath: //div[@class="synonyms-list-group"]
2186 |     title_xpath: //div[@class="upper-synonyms"]/a
2187 |     no_result_for_http_status: [404]
2188 |     about:
2189 |       website: https://www.woxikon.de/
2190 |       wikidata_id:  # No Wikidata ID
2191 |       use_official_api: false
2192 |       require_api_key: false
2193 |       results: HTML
2194 |       language: de
2195 | 
2196 |   - name: seekr news
2197 |     engine: seekr
2198 |     shortcut: senews
2199 |     categories: news
2200 |     seekr_category: news
2201 |     disabled: true
2202 | 
2203 |   - name: seekr images
2204 |     engine: seekr
2205 |     network: seekr news
2206 |     shortcut: seimg
2207 |     categories: images
2208 |     seekr_category: images
2209 |     disabled: true
2210 | 
2211 |   - name: seekr videos
2212 |     engine: seekr
2213 |     network: seekr news
2214 |     shortcut: sevid
2215 |     categories: videos
2216 |     seekr_category: videos
2217 |     disabled: true
2218 | 
2219 |   - name: sjp.pwn
2220 |     engine: sjp
2221 |     shortcut: sjp
2222 |     base_url: https://sjp.pwn.pl/
2223 |     timeout: 5.0
2224 |     disabled: true
2225 | 
2226 |   - name: stract
2227 |     engine: stract
2228 |     shortcut: str
2229 |     disabled: true
2230 | 
2231 |   - name: svgrepo
2232 |     engine: svgrepo
2233 |     shortcut: svg
2234 |     timeout: 10.0
2235 |     disabled: true
2236 | 
2237 |   - name: tootfinder
2238 |     engine: tootfinder
2239 |     shortcut: toot
2240 | 
2241 |   - name: voidlinux
2242 |     engine: voidlinux
2243 |     shortcut: void
2244 |     disabled: true
2245 | 
2246 |   - name: wallhaven
2247 |     engine: wallhaven
2248 |     # api_key: abcdefghijklmnopqrstuvwxyz
2249 |     shortcut: wh
2250 | 
2251 |     # wikimini: online encyclopedia for children
2252 |     # The fulltext and title parameter is necessary for Wikimini because
2253 |     # sometimes it will not show the results and redirect instead
2254 |   - name: wikimini
2255 |     engine: xpath
2256 |     shortcut: wkmn
2257 |     search_url: https://fr.wikimini.org/w/index.php?search={query}&title=Sp%C3%A9cial%3ASearch&fulltext=Search
2258 |     url_xpath: //li/div[@class="mw-search-result-heading"]/a/@href
2259 |     title_xpath: //li//div[@class="mw-search-result-heading"]/a
2260 |     content_xpath: //li/div[@class="searchresult"]
2261 |     categories: general
2262 |     disabled: true
2263 |     about:
2264 |       website: https://wikimini.org/
2265 |       wikidata_id: Q3568032
2266 |       use_official_api: false
2267 |       require_api_key: false
2268 |       results: HTML
2269 |       language: fr
2270 | 
2271 |   - name: wttr.in
2272 |     engine: wttr
2273 |     shortcut: wttr
2274 |     timeout: 9.0
2275 | 
2276 |   - name: yummly
2277 |     engine: yummly
2278 |     shortcut: yum
2279 |     disabled: true
2280 | 
2281 |   - name: brave
2282 |     engine: brave
2283 |     shortcut: br
2284 |     time_range_support: true
2285 |     paging: true
2286 |     categories: [general, web]
2287 |     brave_category: search
2288 |     # brave_spellcheck: true
2289 | 
2290 |   - name: brave.images
2291 |     engine: brave
2292 |     network: brave
2293 |     shortcut: brimg
2294 |     categories: [images, web]
2295 |     brave_category: images
2296 | 
2297 |   - name: brave.videos
2298 |     engine: brave
2299 |     network: brave
2300 |     shortcut: brvid
2301 |     categories: [videos, web]
2302 |     brave_category: videos
2303 | 
2304 |   - name: brave.news
2305 |     engine: brave
2306 |     network: brave
2307 |     shortcut: brnews
2308 |     categories: news
2309 |     brave_category: news
2310 | 
2311 |   # - name: brave.goggles
2312 |   #   engine: brave
2313 |   #   network: brave
2314 |   #   shortcut: brgog
2315 |   #   time_range_support: true
2316 |   #   paging: true
2317 |   #   categories: [general, web]
2318 |   #   brave_category: goggles
2319 |   #   Goggles: # required! This should be a URL ending in .goggle
2320 | 
2321 |   - name: lib.rs
2322 |     shortcut: lrs
2323 |     engine: lib_rs
2324 |     disabled: true
2325 | 
2326 |   - name: sourcehut
2327 |     shortcut: srht
2328 |     engine: xpath
2329 |     paging: true
2330 |     search_url: https://sr.ht/projects?page={pageno}&search={query}
2331 |     results_xpath: (//div[@class="event-list"])[1]/div[@class="event"]
2332 |     url_xpath: ./h4/a[2]/@href
2333 |     title_xpath: ./h4/a[2]
2334 |     content_xpath: ./p
2335 |     first_page_num: 1
2336 |     categories: [it, repos]
2337 |     disabled: true
2338 |     about:
2339 |       website: https://sr.ht
2340 |       wikidata_id: Q78514485
2341 |       official_api_documentation: https://man.sr.ht/
2342 |       use_official_api: false
2343 |       require_api_key: false
2344 |       results: HTML
2345 | 
2346 |   - name: goo
2347 |     shortcut: goo
2348 |     engine: xpath
2349 |     paging: true
2350 |     search_url: https://search.goo.ne.jp/web.jsp?MT={query}&FR={pageno}0
2351 |     url_xpath: //div[@class="result"]/p[@class='title fsL1']/a/@href
2352 |     title_xpath: //div[@class="result"]/p[@class='title fsL1']/a
2353 |     content_xpath: //p[contains(@class,'url fsM')]/following-sibling::p
2354 |     first_page_num: 0
2355 |     categories: [general, web]
2356 |     disabled: true
2357 |     timeout: 4.0
2358 |     about:
2359 |       website: https://search.goo.ne.jp
2360 |       wikidata_id: Q249044
2361 |       use_official_api: false
2362 |       require_api_key: false
2363 |       results: HTML
2364 |       language: ja
2365 | 
2366 |   - name: bt4g
2367 |     engine: bt4g
2368 |     shortcut: bt4g
2369 | 
2370 |   - name: pkg.go.dev
2371 |     engine: pkg_go_dev
2372 |     shortcut: pgo
2373 |     disabled: true
2374 | 
2375 | # Doku engine lets you access to any Doku wiki instance:
2376 | # A public one or a privete/corporate one.
2377 | #  - name: ubuntuwiki
2378 | #    engine: doku
2379 | #    shortcut: uw
2380 | #    base_url: 'https://doc.ubuntu-fr.org'
2381 | 
2382 | # Be careful when enabling this engine if you are
2383 | # running a public instance. Do not expose any sensitive
2384 | # information. You can restrict access by configuring a list
2385 | # of access tokens under tokens.
2386 | #  - name: git grep
2387 | #    engine: command
2388 | #    command: ['git', 'grep', '{{QUERY}}']
2389 | #    shortcut: gg
2390 | #    tokens: []
2391 | #    disabled: true
2392 | #    delimiter:
2393 | #        chars: ':'
2394 | #        keys: ['filepath', 'code']
2395 | 
2396 | # Be careful when enabling this engine if you are
2397 | # running a public instance. Do not expose any sensitive
2398 | # information. You can restrict access by configuring a list
2399 | # of access tokens under tokens.
2400 | #  - name: locate
2401 | #    engine: command
2402 | #    command: ['locate', '{{QUERY}}']
2403 | #    shortcut: loc
2404 | #    tokens: []
2405 | #    disabled: true
2406 | #    delimiter:
2407 | #        chars: ' '
2408 | #        keys: ['line']
2409 | 
2410 | # Be careful when enabling this engine if you are
2411 | # running a public instance. Do not expose any sensitive
2412 | # information. You can restrict access by configuring a list
2413 | # of access tokens under tokens.
2414 | #  - name: find
2415 | #    engine: command
2416 | #    command: ['find', '.', '-name', '{{QUERY}}']
2417 | #    query_type: path
2418 | #    shortcut: fnd
2419 | #    tokens: []
2420 | #    disabled: true
2421 | #    delimiter:
2422 | #        chars: ' '
2423 | #        keys: ['line']
2424 | 
2425 | # Be careful when enabling this engine if you are
2426 | # running a public instance. Do not expose any sensitive
2427 | # information. You can restrict access by configuring a list
2428 | # of access tokens under tokens.
2429 | #  - name: pattern search in files
2430 | #    engine: command
2431 | #    command: ['fgrep', '{{QUERY}}']
2432 | #    shortcut: fgr
2433 | #    tokens: []
2434 | #    disabled: true
2435 | #    delimiter:
2436 | #        chars: ' '
2437 | #        keys: ['line']
2438 | 
2439 | # Be careful when enabling this engine if you are
2440 | # running a public instance. Do not expose any sensitive
2441 | # information. You can restrict access by configuring a list
2442 | # of access tokens under tokens.
2443 | #  - name: regex search in files
2444 | #    engine: command
2445 | #    command: ['grep', '{{QUERY}}']
2446 | #    shortcut: gr
2447 | #    tokens: []
2448 | #    disabled: true
2449 | #    delimiter:
2450 | #        chars: ' '
2451 | #        keys: ['line']
2452 | 
2453 | doi_resolvers:
2454 |   oadoi.org: 'https://oadoi.org/'
2455 |   doi.org: 'https://doi.org/'
2456 |   doai.io: 'https://dissem.in/'
2457 |   sci-hub.se: 'https://sci-hub.se/'
2458 |   sci-hub.st: 'https://sci-hub.st/'
2459 |   sci-hub.ru: 'https://sci-hub.ru/'
2460 | 
2461 | default_doi_resolver: 'oadoi.org'
2462 | 


--------------------------------------------------------------------------------
/searxng/uwsgi.ini:
--------------------------------------------------------------------------------
 1 | [uwsgi]
 2 | # Who will run the code
 3 | uid = searxng
 4 | gid = searxng
 5 | 
 6 | # Number of workers (usually CPU count)
 7 | # default value: %k (= number of CPU core, see Dockerfile)
 8 | workers = %k
 9 | 
10 | # Number of threads per worker
11 | # default value: 4 (see Dockerfile)
12 | threads = 4
13 | 
14 | # The right granted on the created socket
15 | chmod-socket = 666
16 | 
17 | # Plugin to use and interpreter config
18 | single-interpreter = true
19 | master = true
20 | plugin = python3
21 | lazy-apps = true
22 | enable-threads = 4
23 | 
24 | # Module to import
25 | module = searx.webapp
26 | 
27 | # Virtualenv and python path
28 | pythonpath = /usr/local/searxng/
29 | chdir = /usr/local/searxng/searx/
30 | 
31 | # automatically set processes name to something meaningful
32 | auto-procname = true
33 | 
34 | # Disable request logging for privacy
35 | disable-logging = true
36 | log-5xx = true
37 | 
38 | # Set the max size of a request (request-body excluded)
39 | buffer-size = 8192
40 | 
41 | # No keep alive
42 | # See https://github.com/searx/searx-docker/issues/24
43 | add-header = Connection: close
44 | 
45 | # Follow SIGTERM convention
46 | # See https://github.com/searxng/searxng/issues/3427
47 | die-on-term
48 | 
49 | # uwsgi serves the static files
50 | static-map = /static=/usr/local/searxng/searx/static
51 | # expires set to one day
52 | static-expires = /* 86400
53 | static-gzip-all = True
54 | offload-threads = 4
55 | 


--------------------------------------------------------------------------------